xref: /llvm-project/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
14; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
15
16define amdgpu_kernel void @global_agent_unordered_load(
17; GFX6-LABEL: global_agent_unordered_load:
18; GFX6:       ; %bb.0: ; %entry
19; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
20; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
21; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
22; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
23; GFX6-NEXT:    s_mov_b32 s6, s9
24; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
25; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
26; GFX6-NEXT:    s_mov_b32 s13, -1
27; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
28; GFX6-NEXT:    s_mov_b32 s9, s6
29; GFX6-NEXT:    s_mov_b32 s10, s13
30; GFX6-NEXT:    s_mov_b32 s11, s12
31; GFX6-NEXT:    s_mov_b32 s14, s5
32; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
33; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
34; GFX6-NEXT:    s_mov_b32 s5, s14
35; GFX6-NEXT:    s_mov_b32 s6, s13
36; GFX6-NEXT:    s_mov_b32 s7, s12
37; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0
38; GFX6-NEXT:    s_waitcnt vmcnt(0)
39; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
40; GFX6-NEXT:    s_endpgm
41;
42; GFX7-LABEL: global_agent_unordered_load:
43; GFX7:       ; %bb.0: ; %entry
44; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
45; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
46; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
47; GFX7-NEXT:    v_mov_b32_e32 v0, s6
48; GFX7-NEXT:    v_mov_b32_e32 v1, s7
49; GFX7-NEXT:    flat_load_dword v2, v[0:1]
50; GFX7-NEXT:    v_mov_b32_e32 v0, s4
51; GFX7-NEXT:    v_mov_b32_e32 v1, s5
52; GFX7-NEXT:    s_waitcnt vmcnt(0)
53; GFX7-NEXT:    flat_store_dword v[0:1], v2
54; GFX7-NEXT:    s_endpgm
55;
56; GFX10-WGP-LABEL: global_agent_unordered_load:
57; GFX10-WGP:       ; %bb.0: ; %entry
58; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
59; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
60; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
61; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7]
63; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
64; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
65; GFX10-WGP-NEXT:    s_endpgm
66;
67; GFX10-CU-LABEL: global_agent_unordered_load:
68; GFX10-CU:       ; %bb.0: ; %entry
69; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
70; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
71; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
72; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7]
74; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
75; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
76; GFX10-CU-NEXT:    s_endpgm
77;
78; SKIP-CACHE-INV-LABEL: global_agent_unordered_load:
79; SKIP-CACHE-INV:       ; %bb.0: ; %entry
80; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
81; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
82; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
83; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
84; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
85; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
86; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
87; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
88; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
89; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
90; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
91; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
92; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
93; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
94; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
95; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
96; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
97; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
98; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0
99; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
100; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
101; SKIP-CACHE-INV-NEXT:    s_endpgm
102;
103; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_load:
104; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
105; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
106; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
107; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
108; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
109; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
110; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
111; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
112; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
113;
114; GFX90A-TGSPLIT-LABEL: global_agent_unordered_load:
115; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
116; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
117; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
118; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
119; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
121; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
122; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
123; GFX90A-TGSPLIT-NEXT:    s_endpgm
124;
125; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_load:
126; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
127; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
128; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
129; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
130; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
131; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
132; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
133; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
134; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
135;
136; GFX940-TGSPLIT-LABEL: global_agent_unordered_load:
137; GFX940-TGSPLIT:       ; %bb.0: ; %entry
138; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
139; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
140; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
141; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
143; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
144; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
145; GFX940-TGSPLIT-NEXT:    s_endpgm
146;
147; GFX11-WGP-LABEL: global_agent_unordered_load:
148; GFX11-WGP:       ; %bb.0: ; %entry
149; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
150; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
151; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
152; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
154; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
155; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
156; GFX11-WGP-NEXT:    s_endpgm
157;
158; GFX11-CU-LABEL: global_agent_unordered_load:
159; GFX11-CU:       ; %bb.0: ; %entry
160; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
161; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
162; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
163; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
164; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
165; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
166; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
167; GFX11-CU-NEXT:    s_endpgm
168;
169; GFX12-WGP-LABEL: global_agent_unordered_load:
170; GFX12-WGP:       ; %bb.0: ; %entry
171; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
172; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
173; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
174; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
175; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
176; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
177; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
178; GFX12-WGP-NEXT:    s_endpgm
179;
180; GFX12-CU-LABEL: global_agent_unordered_load:
181; GFX12-CU:       ; %bb.0: ; %entry
182; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
183; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
184; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
185; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
186; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
187; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
188; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
189; GFX12-CU-NEXT:    s_endpgm
190    ptr addrspace(1) %in, ptr addrspace(1) %out) {
191entry:
192  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") unordered, align 4
193  store i32 %val, ptr addrspace(1) %out
194  ret void
195}
196
197define amdgpu_kernel void @global_agent_monotonic_load(
198; GFX6-LABEL: global_agent_monotonic_load:
199; GFX6:       ; %bb.0: ; %entry
200; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
201; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
202; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
203; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
204; GFX6-NEXT:    s_mov_b32 s6, s9
205; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
206; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
207; GFX6-NEXT:    s_mov_b32 s13, -1
208; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
209; GFX6-NEXT:    s_mov_b32 s9, s6
210; GFX6-NEXT:    s_mov_b32 s10, s13
211; GFX6-NEXT:    s_mov_b32 s11, s12
212; GFX6-NEXT:    s_mov_b32 s14, s5
213; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
214; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
215; GFX6-NEXT:    s_mov_b32 s5, s14
216; GFX6-NEXT:    s_mov_b32 s6, s13
217; GFX6-NEXT:    s_mov_b32 s7, s12
218; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
219; GFX6-NEXT:    s_waitcnt vmcnt(0)
220; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
221; GFX6-NEXT:    s_endpgm
222;
223; GFX7-LABEL: global_agent_monotonic_load:
224; GFX7:       ; %bb.0: ; %entry
225; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
226; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
227; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
228; GFX7-NEXT:    v_mov_b32_e32 v0, s6
229; GFX7-NEXT:    v_mov_b32_e32 v1, s7
230; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
231; GFX7-NEXT:    v_mov_b32_e32 v0, s4
232; GFX7-NEXT:    v_mov_b32_e32 v1, s5
233; GFX7-NEXT:    s_waitcnt vmcnt(0)
234; GFX7-NEXT:    flat_store_dword v[0:1], v2
235; GFX7-NEXT:    s_endpgm
236;
237; GFX10-WGP-LABEL: global_agent_monotonic_load:
238; GFX10-WGP:       ; %bb.0: ; %entry
239; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
240; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
241; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
242; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
244; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
245; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
246; GFX10-WGP-NEXT:    s_endpgm
247;
248; GFX10-CU-LABEL: global_agent_monotonic_load:
249; GFX10-CU:       ; %bb.0: ; %entry
250; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
251; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
252; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
253; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
254; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
255; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
256; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
257; GFX10-CU-NEXT:    s_endpgm
258;
259; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load:
260; SKIP-CACHE-INV:       ; %bb.0: ; %entry
261; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
262; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
263; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
264; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
265; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
266; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
267; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
268; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
269; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
270; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
271; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
272; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
273; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
274; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
275; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
276; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
277; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
278; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
279; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
280; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
281; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
282; SKIP-CACHE-INV-NEXT:    s_endpgm
283;
284; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_load:
285; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
286; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
287; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
288; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
289; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
291; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
292; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
293; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
294;
295; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_load:
296; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
297; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
298; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
299; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
300; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
301; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
302; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
303; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
304; GFX90A-TGSPLIT-NEXT:    s_endpgm
305;
306; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_load:
307; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
308; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
309; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
310; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
311; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
313; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
314; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
315; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
316;
317; GFX940-TGSPLIT-LABEL: global_agent_monotonic_load:
318; GFX940-TGSPLIT:       ; %bb.0: ; %entry
319; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
320; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
321; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
322; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
323; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
324; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
325; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
326; GFX940-TGSPLIT-NEXT:    s_endpgm
327;
328; GFX11-WGP-LABEL: global_agent_monotonic_load:
329; GFX11-WGP:       ; %bb.0: ; %entry
330; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
331; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
332; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
333; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
334; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
335; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
336; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
337; GFX11-WGP-NEXT:    s_endpgm
338;
339; GFX11-CU-LABEL: global_agent_monotonic_load:
340; GFX11-CU:       ; %bb.0: ; %entry
341; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
342; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
343; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
344; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
345; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
346; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
347; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
348; GFX11-CU-NEXT:    s_endpgm
349;
350; GFX12-WGP-LABEL: global_agent_monotonic_load:
351; GFX12-WGP:       ; %bb.0: ; %entry
352; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
353; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
354; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
355; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
356; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
357; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
358; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
359; GFX12-WGP-NEXT:    s_endpgm
360;
361; GFX12-CU-LABEL: global_agent_monotonic_load:
362; GFX12-CU:       ; %bb.0: ; %entry
363; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
364; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
365; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
366; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
367; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
368; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
369; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
370; GFX12-CU-NEXT:    s_endpgm
371    ptr addrspace(1) %in, ptr addrspace(1) %out) {
372entry:
373  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") monotonic, align 4
374  store i32 %val, ptr addrspace(1) %out
375  ret void
376}
377
378define amdgpu_kernel void @global_agent_acquire_load(
379; GFX6-LABEL: global_agent_acquire_load:
380; GFX6:       ; %bb.0: ; %entry
381; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
382; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
383; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
384; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
385; GFX6-NEXT:    s_mov_b32 s6, s9
386; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
387; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
388; GFX6-NEXT:    s_mov_b32 s13, -1
389; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
390; GFX6-NEXT:    s_mov_b32 s9, s6
391; GFX6-NEXT:    s_mov_b32 s10, s13
392; GFX6-NEXT:    s_mov_b32 s11, s12
393; GFX6-NEXT:    s_mov_b32 s14, s5
394; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
395; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
396; GFX6-NEXT:    s_mov_b32 s5, s14
397; GFX6-NEXT:    s_mov_b32 s6, s13
398; GFX6-NEXT:    s_mov_b32 s7, s12
399; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
400; GFX6-NEXT:    s_waitcnt vmcnt(0)
401; GFX6-NEXT:    buffer_wbinvl1
402; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
403; GFX6-NEXT:    s_endpgm
404;
405; GFX7-LABEL: global_agent_acquire_load:
406; GFX7:       ; %bb.0: ; %entry
407; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
408; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
409; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
410; GFX7-NEXT:    v_mov_b32_e32 v0, s6
411; GFX7-NEXT:    v_mov_b32_e32 v1, s7
412; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
413; GFX7-NEXT:    s_waitcnt vmcnt(0)
414; GFX7-NEXT:    buffer_wbinvl1_vol
415; GFX7-NEXT:    v_mov_b32_e32 v0, s4
416; GFX7-NEXT:    v_mov_b32_e32 v1, s5
417; GFX7-NEXT:    flat_store_dword v[0:1], v2
418; GFX7-NEXT:    s_endpgm
419;
420; GFX10-WGP-LABEL: global_agent_acquire_load:
421; GFX10-WGP:       ; %bb.0: ; %entry
422; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
423; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
424; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
425; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
426; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
427; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
428; GFX10-WGP-NEXT:    buffer_gl1_inv
429; GFX10-WGP-NEXT:    buffer_gl0_inv
430; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
431; GFX10-WGP-NEXT:    s_endpgm
432;
433; GFX10-CU-LABEL: global_agent_acquire_load:
434; GFX10-CU:       ; %bb.0: ; %entry
435; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
436; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
437; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
438; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
439; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
440; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
441; GFX10-CU-NEXT:    buffer_gl1_inv
442; GFX10-CU-NEXT:    buffer_gl0_inv
443; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
444; GFX10-CU-NEXT:    s_endpgm
445;
446; SKIP-CACHE-INV-LABEL: global_agent_acquire_load:
447; SKIP-CACHE-INV:       ; %bb.0: ; %entry
448; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
449; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
450; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
451; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
452; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
453; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
454; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
455; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
456; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
457; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
458; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
459; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
460; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
461; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
462; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
463; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
464; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
465; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
466; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
467; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
468; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
469; SKIP-CACHE-INV-NEXT:    s_endpgm
470;
471; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_load:
472; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
473; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
474; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
475; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
476; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
477; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
478; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
479; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
480; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
481; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
482;
483; GFX90A-TGSPLIT-LABEL: global_agent_acquire_load:
484; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
485; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
486; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
487; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
488; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
489; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
490; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
491; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
492; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
493; GFX90A-TGSPLIT-NEXT:    s_endpgm
494;
495; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_load:
496; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
497; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
498; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
499; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
500; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
501; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
502; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
503; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
504; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
505; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
506;
507; GFX940-TGSPLIT-LABEL: global_agent_acquire_load:
508; GFX940-TGSPLIT:       ; %bb.0: ; %entry
509; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
510; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
511; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
512; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
513; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
514; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
515; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
516; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
517; GFX940-TGSPLIT-NEXT:    s_endpgm
518;
519; GFX11-WGP-LABEL: global_agent_acquire_load:
520; GFX11-WGP:       ; %bb.0: ; %entry
521; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
522; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
523; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
524; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
525; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
526; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
527; GFX11-WGP-NEXT:    buffer_gl1_inv
528; GFX11-WGP-NEXT:    buffer_gl0_inv
529; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
530; GFX11-WGP-NEXT:    s_endpgm
531;
532; GFX11-CU-LABEL: global_agent_acquire_load:
533; GFX11-CU:       ; %bb.0: ; %entry
534; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
535; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
536; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
537; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
538; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
539; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
540; GFX11-CU-NEXT:    buffer_gl1_inv
541; GFX11-CU-NEXT:    buffer_gl0_inv
542; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
543; GFX11-CU-NEXT:    s_endpgm
544;
545; GFX12-WGP-LABEL: global_agent_acquire_load:
546; GFX12-WGP:       ; %bb.0: ; %entry
547; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
548; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
549; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
550; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
551; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
552; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
553; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
554; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
555; GFX12-WGP-NEXT:    s_endpgm
556;
557; GFX12-CU-LABEL: global_agent_acquire_load:
558; GFX12-CU:       ; %bb.0: ; %entry
559; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
560; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
561; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
562; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
563; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
564; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
565; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
566; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
567; GFX12-CU-NEXT:    s_endpgm
568    ptr addrspace(1) %in, ptr addrspace(1) %out) {
569entry:
570  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") acquire, align 4
571  store i32 %val, ptr addrspace(1) %out
572  ret void
573}
574
575define amdgpu_kernel void @global_agent_seq_cst_load(
576; GFX6-LABEL: global_agent_seq_cst_load:
577; GFX6:       ; %bb.0: ; %entry
578; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
579; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
580; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
581; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
582; GFX6-NEXT:    s_mov_b32 s6, s9
583; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
584; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
585; GFX6-NEXT:    s_mov_b32 s13, -1
586; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
587; GFX6-NEXT:    s_mov_b32 s9, s6
588; GFX6-NEXT:    s_mov_b32 s10, s13
589; GFX6-NEXT:    s_mov_b32 s11, s12
590; GFX6-NEXT:    s_mov_b32 s14, s5
591; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
592; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
593; GFX6-NEXT:    s_mov_b32 s5, s14
594; GFX6-NEXT:    s_mov_b32 s6, s13
595; GFX6-NEXT:    s_mov_b32 s7, s12
596; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
597; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
598; GFX6-NEXT:    s_waitcnt vmcnt(0)
599; GFX6-NEXT:    buffer_wbinvl1
600; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
601; GFX6-NEXT:    s_endpgm
602;
603; GFX7-LABEL: global_agent_seq_cst_load:
604; GFX7:       ; %bb.0: ; %entry
605; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
606; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
607; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
608; GFX7-NEXT:    v_mov_b32_e32 v0, s6
609; GFX7-NEXT:    v_mov_b32_e32 v1, s7
610; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
611; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
612; GFX7-NEXT:    s_waitcnt vmcnt(0)
613; GFX7-NEXT:    buffer_wbinvl1_vol
614; GFX7-NEXT:    v_mov_b32_e32 v0, s4
615; GFX7-NEXT:    v_mov_b32_e32 v1, s5
616; GFX7-NEXT:    flat_store_dword v[0:1], v2
617; GFX7-NEXT:    s_endpgm
618;
619; GFX10-WGP-LABEL: global_agent_seq_cst_load:
620; GFX10-WGP:       ; %bb.0: ; %entry
621; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
622; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
623; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
624; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
625; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
626; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
627; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
628; GFX10-WGP-NEXT:    buffer_gl1_inv
629; GFX10-WGP-NEXT:    buffer_gl0_inv
630; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
631; GFX10-WGP-NEXT:    s_endpgm
632;
633; GFX10-CU-LABEL: global_agent_seq_cst_load:
634; GFX10-CU:       ; %bb.0: ; %entry
635; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
636; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
637; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
638; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
639; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
640; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
641; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
642; GFX10-CU-NEXT:    buffer_gl1_inv
643; GFX10-CU-NEXT:    buffer_gl0_inv
644; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
645; GFX10-CU-NEXT:    s_endpgm
646;
647; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load:
648; SKIP-CACHE-INV:       ; %bb.0: ; %entry
649; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
650; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
651; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
652; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
653; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
654; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
655; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
656; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
657; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
658; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
659; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
660; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
661; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
662; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
663; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
664; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
665; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
666; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
667; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
668; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
669; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
670; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
671; SKIP-CACHE-INV-NEXT:    s_endpgm
672;
673; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_load:
674; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
675; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
676; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
677; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
678; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
679; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
680; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
681; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
682; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
683; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
684;
685; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_load:
686; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
687; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
688; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
689; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
690; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
691; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
692; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
693; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
694; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
695; GFX90A-TGSPLIT-NEXT:    s_endpgm
696;
697; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_load:
698; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
699; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
700; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
701; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
702; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
703; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
704; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
705; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
706; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
707; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
708;
709; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_load:
710; GFX940-TGSPLIT:       ; %bb.0: ; %entry
711; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
712; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
713; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
714; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
715; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
716; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
717; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
718; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
719; GFX940-TGSPLIT-NEXT:    s_endpgm
720;
721; GFX11-WGP-LABEL: global_agent_seq_cst_load:
722; GFX11-WGP:       ; %bb.0: ; %entry
723; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
724; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
725; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
726; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
727; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
728; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
729; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
730; GFX11-WGP-NEXT:    buffer_gl1_inv
731; GFX11-WGP-NEXT:    buffer_gl0_inv
732; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
733; GFX11-WGP-NEXT:    s_endpgm
734;
735; GFX11-CU-LABEL: global_agent_seq_cst_load:
736; GFX11-CU:       ; %bb.0: ; %entry
737; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
738; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
739; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
740; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
741; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
742; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
743; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
744; GFX11-CU-NEXT:    buffer_gl1_inv
745; GFX11-CU-NEXT:    buffer_gl0_inv
746; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
747; GFX11-CU-NEXT:    s_endpgm
748;
749; GFX12-WGP-LABEL: global_agent_seq_cst_load:
750; GFX12-WGP:       ; %bb.0: ; %entry
751; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
752; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
753; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
754; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
755; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
756; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
757; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
758; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
759; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
760; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
761; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
762; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
763; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
764; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
765; GFX12-WGP-NEXT:    s_endpgm
766;
767; GFX12-CU-LABEL: global_agent_seq_cst_load:
768; GFX12-CU:       ; %bb.0: ; %entry
769; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
770; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
771; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
772; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
773; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
774; GFX12-CU-NEXT:    s_wait_storecnt 0x0
775; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
776; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
777; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
778; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
779; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
780; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
781; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
782; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
783; GFX12-CU-NEXT:    s_endpgm
784    ptr addrspace(1) %in, ptr addrspace(1) %out) {
785entry:
786  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent") seq_cst, align 4
787  store i32 %val, ptr addrspace(1) %out
788  ret void
789}
790
791define amdgpu_kernel void @global_agent_unordered_store(
792; GFX6-LABEL: global_agent_unordered_store:
793; GFX6:       ; %bb.0: ; %entry
794; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
795; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
796; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
797; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
798; GFX6-NEXT:    s_mov_b32 s11, s5
799; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
800; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
801; GFX6-NEXT:    s_mov_b32 s10, -1
802; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
803; GFX6-NEXT:    s_mov_b32 s5, s11
804; GFX6-NEXT:    s_mov_b32 s6, s10
805; GFX6-NEXT:    s_mov_b32 s7, s9
806; GFX6-NEXT:    v_mov_b32_e32 v0, s8
807; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
808; GFX6-NEXT:    s_endpgm
809;
810; GFX7-LABEL: global_agent_unordered_store:
811; GFX7:       ; %bb.0: ; %entry
812; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
813; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
814; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
815; GFX7-NEXT:    v_mov_b32_e32 v0, s6
816; GFX7-NEXT:    v_mov_b32_e32 v1, s7
817; GFX7-NEXT:    v_mov_b32_e32 v2, s4
818; GFX7-NEXT:    flat_store_dword v[0:1], v2
819; GFX7-NEXT:    s_endpgm
820;
821; GFX10-WGP-LABEL: global_agent_unordered_store:
822; GFX10-WGP:       ; %bb.0: ; %entry
823; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
824; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
825; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
826; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
827; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
828; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
829; GFX10-WGP-NEXT:    s_endpgm
830;
831; GFX10-CU-LABEL: global_agent_unordered_store:
832; GFX10-CU:       ; %bb.0: ; %entry
833; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
834; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
835; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
836; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
837; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
838; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
839; GFX10-CU-NEXT:    s_endpgm
840;
841; SKIP-CACHE-INV-LABEL: global_agent_unordered_store:
842; SKIP-CACHE-INV:       ; %bb.0: ; %entry
843; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
844; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
845; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
846; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
847; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
848; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
849; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
850; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
851; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
852; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
853; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
854; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
855; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
856; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
857; SKIP-CACHE-INV-NEXT:    s_endpgm
858;
859; GFX90A-NOTTGSPLIT-LABEL: global_agent_unordered_store:
860; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
861; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
862; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
863; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
864; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
865; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
866; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
867; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
868;
869; GFX90A-TGSPLIT-LABEL: global_agent_unordered_store:
870; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
871; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
872; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
873; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
874; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
875; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
876; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
877; GFX90A-TGSPLIT-NEXT:    s_endpgm
878;
879; GFX940-NOTTGSPLIT-LABEL: global_agent_unordered_store:
880; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
881; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
882; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
883; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
884; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
885; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
886; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
887; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
888;
889; GFX940-TGSPLIT-LABEL: global_agent_unordered_store:
890; GFX940-TGSPLIT:       ; %bb.0: ; %entry
891; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
892; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
893; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
894; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
895; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
896; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
897; GFX940-TGSPLIT-NEXT:    s_endpgm
898;
899; GFX11-WGP-LABEL: global_agent_unordered_store:
900; GFX11-WGP:       ; %bb.0: ; %entry
901; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
902; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
903; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
904; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
905; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
906; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
907; GFX11-WGP-NEXT:    s_endpgm
908;
909; GFX11-CU-LABEL: global_agent_unordered_store:
910; GFX11-CU:       ; %bb.0: ; %entry
911; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
912; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
913; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
914; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
915; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
916; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
917; GFX11-CU-NEXT:    s_endpgm
918;
919; GFX12-WGP-LABEL: global_agent_unordered_store:
920; GFX12-WGP:       ; %bb.0: ; %entry
921; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
922; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
923; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
924; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
925; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
926; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
927; GFX12-WGP-NEXT:    s_endpgm
928;
929; GFX12-CU-LABEL: global_agent_unordered_store:
930; GFX12-CU:       ; %bb.0: ; %entry
931; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
932; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
933; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
934; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
935; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
936; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
937; GFX12-CU-NEXT:    s_endpgm
938    i32 %in, ptr addrspace(1) %out) {
939entry:
940  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") unordered, align 4
941  ret void
942}
943
944define amdgpu_kernel void @global_agent_monotonic_store(
945; GFX6-LABEL: global_agent_monotonic_store:
946; GFX6:       ; %bb.0: ; %entry
947; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
948; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
949; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
950; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
951; GFX6-NEXT:    s_mov_b32 s11, s5
952; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
953; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
954; GFX6-NEXT:    s_mov_b32 s10, -1
955; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
956; GFX6-NEXT:    s_mov_b32 s5, s11
957; GFX6-NEXT:    s_mov_b32 s6, s10
958; GFX6-NEXT:    s_mov_b32 s7, s9
959; GFX6-NEXT:    v_mov_b32_e32 v0, s8
960; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
961; GFX6-NEXT:    s_endpgm
962;
963; GFX7-LABEL: global_agent_monotonic_store:
964; GFX7:       ; %bb.0: ; %entry
965; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
966; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
967; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
968; GFX7-NEXT:    v_mov_b32_e32 v0, s6
969; GFX7-NEXT:    v_mov_b32_e32 v1, s7
970; GFX7-NEXT:    v_mov_b32_e32 v2, s4
971; GFX7-NEXT:    flat_store_dword v[0:1], v2
972; GFX7-NEXT:    s_endpgm
973;
974; GFX10-WGP-LABEL: global_agent_monotonic_store:
975; GFX10-WGP:       ; %bb.0: ; %entry
976; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
977; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
978; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
979; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
980; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
981; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
982; GFX10-WGP-NEXT:    s_endpgm
983;
984; GFX10-CU-LABEL: global_agent_monotonic_store:
985; GFX10-CU:       ; %bb.0: ; %entry
986; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
987; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
988; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
989; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
990; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
991; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
992; GFX10-CU-NEXT:    s_endpgm
993;
994; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store:
995; SKIP-CACHE-INV:       ; %bb.0: ; %entry
996; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
997; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
998; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
999; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1000; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1001; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1002; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1003; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1004; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1005; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1006; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1007; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1008; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1009; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1010; SKIP-CACHE-INV-NEXT:    s_endpgm
1011;
1012; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_store:
1013; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1014; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1015; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1016; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1017; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1018; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1019; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1020; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1021;
1022; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_store:
1023; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1024; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1025; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1026; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1027; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1028; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1029; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1030; GFX90A-TGSPLIT-NEXT:    s_endpgm
1031;
1032; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_store:
1033; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1034; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1035; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1036; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1037; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1038; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1039; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1040; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1041;
1042; GFX940-TGSPLIT-LABEL: global_agent_monotonic_store:
1043; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1044; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1045; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1046; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1047; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1048; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1049; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1050; GFX940-TGSPLIT-NEXT:    s_endpgm
1051;
1052; GFX11-WGP-LABEL: global_agent_monotonic_store:
1053; GFX11-WGP:       ; %bb.0: ; %entry
1054; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1055; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1056; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1057; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1058; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1059; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1060; GFX11-WGP-NEXT:    s_endpgm
1061;
1062; GFX11-CU-LABEL: global_agent_monotonic_store:
1063; GFX11-CU:       ; %bb.0: ; %entry
1064; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1065; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1066; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1067; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1068; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1069; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1070; GFX11-CU-NEXT:    s_endpgm
1071;
1072; GFX12-WGP-LABEL: global_agent_monotonic_store:
1073; GFX12-WGP:       ; %bb.0: ; %entry
1074; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1075; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1076; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1077; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1078; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1079; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1080; GFX12-WGP-NEXT:    s_endpgm
1081;
1082; GFX12-CU-LABEL: global_agent_monotonic_store:
1083; GFX12-CU:       ; %bb.0: ; %entry
1084; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1085; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1086; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1087; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1088; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1089; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1090; GFX12-CU-NEXT:    s_endpgm
1091    i32 %in, ptr addrspace(1) %out) {
1092entry:
1093  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") monotonic, align 4
1094  ret void
1095}
1096
1097define amdgpu_kernel void @global_agent_release_store(
1098; GFX6-LABEL: global_agent_release_store:
1099; GFX6:       ; %bb.0: ; %entry
1100; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
1101; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
1102; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
1103; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1104; GFX6-NEXT:    s_mov_b32 s11, s5
1105; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1106; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1107; GFX6-NEXT:    s_mov_b32 s10, -1
1108; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1109; GFX6-NEXT:    s_mov_b32 s5, s11
1110; GFX6-NEXT:    s_mov_b32 s6, s10
1111; GFX6-NEXT:    s_mov_b32 s7, s9
1112; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1113; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1114; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1115; GFX6-NEXT:    s_endpgm
1116;
1117; GFX7-LABEL: global_agent_release_store:
1118; GFX7:       ; %bb.0: ; %entry
1119; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
1120; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
1121; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1122; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1123; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1124; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1125; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1126; GFX7-NEXT:    flat_store_dword v[0:1], v2
1127; GFX7-NEXT:    s_endpgm
1128;
1129; GFX10-WGP-LABEL: global_agent_release_store:
1130; GFX10-WGP:       ; %bb.0: ; %entry
1131; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
1132; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1133; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1134; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1135; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1136; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1137; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1138; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
1139; GFX10-WGP-NEXT:    s_endpgm
1140;
1141; GFX10-CU-LABEL: global_agent_release_store:
1142; GFX10-CU:       ; %bb.0: ; %entry
1143; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
1144; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1145; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1146; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1147; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1148; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1149; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1150; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
1151; GFX10-CU-NEXT:    s_endpgm
1152;
1153; SKIP-CACHE-INV-LABEL: global_agent_release_store:
1154; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1155; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
1156; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
1157; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
1158; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1159; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1160; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1161; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1162; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1163; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1164; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1165; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1166; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1167; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1168; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1169; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1170; SKIP-CACHE-INV-NEXT:    s_endpgm
1171;
1172; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_store:
1173; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1174; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1175; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1176; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1177; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1178; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1179; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1180; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1181; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1182;
1183; GFX90A-TGSPLIT-LABEL: global_agent_release_store:
1184; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1185; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1186; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1187; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1188; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1189; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1190; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1191; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1192; GFX90A-TGSPLIT-NEXT:    s_endpgm
1193;
1194; GFX940-NOTTGSPLIT-LABEL: global_agent_release_store:
1195; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1196; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1197; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1198; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1199; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1200; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1201; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
1202; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1203; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1204; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1205;
1206; GFX940-TGSPLIT-LABEL: global_agent_release_store:
1207; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1208; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1209; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1210; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1211; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1212; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1213; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
1214; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1215; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1216; GFX940-TGSPLIT-NEXT:    s_endpgm
1217;
1218; GFX11-WGP-LABEL: global_agent_release_store:
1219; GFX11-WGP:       ; %bb.0: ; %entry
1220; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1221; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1222; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1223; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1224; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1225; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1226; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1227; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1228; GFX11-WGP-NEXT:    s_endpgm
1229;
1230; GFX11-CU-LABEL: global_agent_release_store:
1231; GFX11-CU:       ; %bb.0: ; %entry
1232; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1233; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1234; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1235; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1236; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1237; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1238; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1239; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1240; GFX11-CU-NEXT:    s_endpgm
1241;
1242; GFX12-WGP-LABEL: global_agent_release_store:
1243; GFX12-WGP:       ; %bb.0: ; %entry
1244; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1245; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1246; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1247; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1248; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1249; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1250; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1251; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1252; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1253; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1254; GFX12-WGP-NEXT:    s_endpgm
1255;
1256; GFX12-CU-LABEL: global_agent_release_store:
1257; GFX12-CU:       ; %bb.0: ; %entry
1258; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1259; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1260; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1261; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1262; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1263; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1264; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1265; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1266; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1267; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1268; GFX12-CU-NEXT:    s_endpgm
1269    i32 %in, ptr addrspace(1) %out) {
1270entry:
1271  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") release, align 4
1272  ret void
1273}
1274
1275define amdgpu_kernel void @global_agent_seq_cst_store(
1276; GFX6-LABEL: global_agent_seq_cst_store:
1277; GFX6:       ; %bb.0: ; %entry
1278; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
1279; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
1280; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
1281; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1282; GFX6-NEXT:    s_mov_b32 s11, s5
1283; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1284; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1285; GFX6-NEXT:    s_mov_b32 s10, -1
1286; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1287; GFX6-NEXT:    s_mov_b32 s5, s11
1288; GFX6-NEXT:    s_mov_b32 s6, s10
1289; GFX6-NEXT:    s_mov_b32 s7, s9
1290; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1291; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1292; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1293; GFX6-NEXT:    s_endpgm
1294;
1295; GFX7-LABEL: global_agent_seq_cst_store:
1296; GFX7:       ; %bb.0: ; %entry
1297; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
1298; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
1299; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1300; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1301; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1302; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1303; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1304; GFX7-NEXT:    flat_store_dword v[0:1], v2
1305; GFX7-NEXT:    s_endpgm
1306;
1307; GFX10-WGP-LABEL: global_agent_seq_cst_store:
1308; GFX10-WGP:       ; %bb.0: ; %entry
1309; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
1310; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1311; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1312; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1313; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1314; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1315; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1316; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
1317; GFX10-WGP-NEXT:    s_endpgm
1318;
1319; GFX10-CU-LABEL: global_agent_seq_cst_store:
1320; GFX10-CU:       ; %bb.0: ; %entry
1321; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
1322; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1323; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1324; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1325; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1326; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1327; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1328; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
1329; GFX10-CU-NEXT:    s_endpgm
1330;
1331; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store:
1332; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1333; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
1334; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
1335; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
1336; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1337; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1338; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1339; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1340; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1341; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1342; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1343; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1344; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1345; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1346; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1347; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1348; SKIP-CACHE-INV-NEXT:    s_endpgm
1349;
1350; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_store:
1351; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1352; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1353; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1354; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1355; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1356; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1357; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1358; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1359; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1360;
1361; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_store:
1362; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1363; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1364; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1365; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1366; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1367; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1368; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1369; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1370; GFX90A-TGSPLIT-NEXT:    s_endpgm
1371;
1372; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_store:
1373; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1374; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1375; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1376; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1377; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1378; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1379; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
1380; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1381; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1382; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1383;
1384; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_store:
1385; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1386; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1387; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1388; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1389; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1390; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1391; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
1392; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1393; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1394; GFX940-TGSPLIT-NEXT:    s_endpgm
1395;
1396; GFX11-WGP-LABEL: global_agent_seq_cst_store:
1397; GFX11-WGP:       ; %bb.0: ; %entry
1398; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1399; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1400; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1401; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1402; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1403; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1404; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1405; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1406; GFX11-WGP-NEXT:    s_endpgm
1407;
1408; GFX11-CU-LABEL: global_agent_seq_cst_store:
1409; GFX11-CU:       ; %bb.0: ; %entry
1410; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1411; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1412; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1413; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1414; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1415; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1416; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1417; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1418; GFX11-CU-NEXT:    s_endpgm
1419;
1420; GFX12-WGP-LABEL: global_agent_seq_cst_store:
1421; GFX12-WGP:       ; %bb.0: ; %entry
1422; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1423; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1424; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1425; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1426; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1427; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1428; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1429; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1430; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1431; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1432; GFX12-WGP-NEXT:    s_endpgm
1433;
1434; GFX12-CU-LABEL: global_agent_seq_cst_store:
1435; GFX12-CU:       ; %bb.0: ; %entry
1436; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1437; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1438; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1439; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1440; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1441; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1442; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1443; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1444; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1445; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1446; GFX12-CU-NEXT:    s_endpgm
1447    i32 %in, ptr addrspace(1) %out) {
1448entry:
1449  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent") seq_cst, align 4
1450  ret void
1451}
1452
1453define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
1454; GFX6-LABEL: global_agent_monotonic_atomicrmw:
1455; GFX6:       ; %bb.0: ; %entry
1456; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1457; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1458; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1459; GFX6-NEXT:    s_mov_b32 s11, s5
1460; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1461; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1462; GFX6-NEXT:    s_mov_b32 s10, -1
1463; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1464; GFX6-NEXT:    s_mov_b32 s5, s11
1465; GFX6-NEXT:    s_mov_b32 s6, s10
1466; GFX6-NEXT:    s_mov_b32 s7, s9
1467; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1468; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1469; GFX6-NEXT:    s_endpgm
1470;
1471; GFX7-LABEL: global_agent_monotonic_atomicrmw:
1472; GFX7:       ; %bb.0: ; %entry
1473; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1474; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1475; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1476; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1477; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1478; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1479; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1480; GFX7-NEXT:    s_endpgm
1481;
1482; GFX10-WGP-LABEL: global_agent_monotonic_atomicrmw:
1483; GFX10-WGP:       ; %bb.0: ; %entry
1484; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1485; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1486; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1487; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1488; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1489; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1490; GFX10-WGP-NEXT:    s_endpgm
1491;
1492; GFX10-CU-LABEL: global_agent_monotonic_atomicrmw:
1493; GFX10-CU:       ; %bb.0: ; %entry
1494; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1495; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1496; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1497; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1498; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1499; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1500; GFX10-CU-NEXT:    s_endpgm
1501;
1502; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw:
1503; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1504; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1505; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1506; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1507; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1508; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1509; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1510; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1511; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1512; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1513; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1514; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1515; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1516; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1517; SKIP-CACHE-INV-NEXT:    s_endpgm
1518;
1519; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw:
1520; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1521; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1522; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1523; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1524; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1525; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1526; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1527; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1528;
1529; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw:
1530; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1531; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1532; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1533; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1534; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1535; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1536; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1537; GFX90A-TGSPLIT-NEXT:    s_endpgm
1538;
1539; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_atomicrmw:
1540; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1541; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1542; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1543; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1544; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1545; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1546; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1547; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1548;
1549; GFX940-TGSPLIT-LABEL: global_agent_monotonic_atomicrmw:
1550; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1551; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1552; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1553; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1554; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1555; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1556; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1557; GFX940-TGSPLIT-NEXT:    s_endpgm
1558;
1559; GFX11-WGP-LABEL: global_agent_monotonic_atomicrmw:
1560; GFX11-WGP:       ; %bb.0: ; %entry
1561; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1562; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1563; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1564; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1565; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1566; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1567; GFX11-WGP-NEXT:    s_endpgm
1568;
1569; GFX11-CU-LABEL: global_agent_monotonic_atomicrmw:
1570; GFX11-CU:       ; %bb.0: ; %entry
1571; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1572; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1573; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1574; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1575; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1576; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1577; GFX11-CU-NEXT:    s_endpgm
1578;
1579; GFX12-WGP-LABEL: global_agent_monotonic_atomicrmw:
1580; GFX12-WGP:       ; %bb.0: ; %entry
1581; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1582; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1583; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1584; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1585; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1586; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1587; GFX12-WGP-NEXT:    s_endpgm
1588;
1589; GFX12-CU-LABEL: global_agent_monotonic_atomicrmw:
1590; GFX12-CU:       ; %bb.0: ; %entry
1591; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1592; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1593; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1594; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1595; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1596; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1597; GFX12-CU-NEXT:    s_endpgm
1598    ptr addrspace(1) %out, i32 %in) {
1599entry:
1600  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") monotonic
1601  ret void
1602}
1603
1604define amdgpu_kernel void @global_agent_acquire_atomicrmw(
1605; GFX6-LABEL: global_agent_acquire_atomicrmw:
1606; GFX6:       ; %bb.0: ; %entry
1607; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1608; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1609; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1610; GFX6-NEXT:    s_mov_b32 s11, s5
1611; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1612; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1613; GFX6-NEXT:    s_mov_b32 s10, -1
1614; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1615; GFX6-NEXT:    s_mov_b32 s5, s11
1616; GFX6-NEXT:    s_mov_b32 s6, s10
1617; GFX6-NEXT:    s_mov_b32 s7, s9
1618; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1619; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1620; GFX6-NEXT:    s_waitcnt vmcnt(0)
1621; GFX6-NEXT:    buffer_wbinvl1
1622; GFX6-NEXT:    s_endpgm
1623;
1624; GFX7-LABEL: global_agent_acquire_atomicrmw:
1625; GFX7:       ; %bb.0: ; %entry
1626; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1627; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1628; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1629; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1630; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1631; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1632; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1633; GFX7-NEXT:    s_waitcnt vmcnt(0)
1634; GFX7-NEXT:    buffer_wbinvl1_vol
1635; GFX7-NEXT:    s_endpgm
1636;
1637; GFX10-WGP-LABEL: global_agent_acquire_atomicrmw:
1638; GFX10-WGP:       ; %bb.0: ; %entry
1639; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1640; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1641; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1642; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1643; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1644; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1645; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1646; GFX10-WGP-NEXT:    buffer_gl1_inv
1647; GFX10-WGP-NEXT:    buffer_gl0_inv
1648; GFX10-WGP-NEXT:    s_endpgm
1649;
1650; GFX10-CU-LABEL: global_agent_acquire_atomicrmw:
1651; GFX10-CU:       ; %bb.0: ; %entry
1652; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1653; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1654; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1655; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1656; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1657; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1658; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1659; GFX10-CU-NEXT:    buffer_gl1_inv
1660; GFX10-CU-NEXT:    buffer_gl0_inv
1661; GFX10-CU-NEXT:    s_endpgm
1662;
1663; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw:
1664; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1665; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1666; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1667; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1668; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1669; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1670; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1671; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1672; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1673; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1674; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1675; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1676; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1677; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1678; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
1679; SKIP-CACHE-INV-NEXT:    s_endpgm
1680;
1681; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw:
1682; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1683; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1684; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1685; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1686; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1687; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1688; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1689; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1690; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
1691; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1692;
1693; GFX90A-TGSPLIT-LABEL: global_agent_acquire_atomicrmw:
1694; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1695; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1696; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1697; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1698; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1699; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1700; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1701; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1702; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
1703; GFX90A-TGSPLIT-NEXT:    s_endpgm
1704;
1705; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_atomicrmw:
1706; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1707; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1708; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1709; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1710; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1711; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1712; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1713; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1714; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
1715; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1716;
1717; GFX940-TGSPLIT-LABEL: global_agent_acquire_atomicrmw:
1718; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1719; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1720; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1721; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1722; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1723; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1724; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1725; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1726; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
1727; GFX940-TGSPLIT-NEXT:    s_endpgm
1728;
1729; GFX11-WGP-LABEL: global_agent_acquire_atomicrmw:
1730; GFX11-WGP:       ; %bb.0: ; %entry
1731; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1732; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1733; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1734; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1735; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1736; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1737; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1738; GFX11-WGP-NEXT:    buffer_gl1_inv
1739; GFX11-WGP-NEXT:    buffer_gl0_inv
1740; GFX11-WGP-NEXT:    s_endpgm
1741;
1742; GFX11-CU-LABEL: global_agent_acquire_atomicrmw:
1743; GFX11-CU:       ; %bb.0: ; %entry
1744; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1745; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1746; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1747; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1748; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1749; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1750; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1751; GFX11-CU-NEXT:    buffer_gl1_inv
1752; GFX11-CU-NEXT:    buffer_gl0_inv
1753; GFX11-CU-NEXT:    s_endpgm
1754;
1755; GFX12-WGP-LABEL: global_agent_acquire_atomicrmw:
1756; GFX12-WGP:       ; %bb.0: ; %entry
1757; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1758; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1759; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1760; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1761; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1762; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1763; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1764; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
1765; GFX12-WGP-NEXT:    s_endpgm
1766;
1767; GFX12-CU-LABEL: global_agent_acquire_atomicrmw:
1768; GFX12-CU:       ; %bb.0: ; %entry
1769; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1770; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1771; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1772; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1773; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1774; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1775; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1776; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
1777; GFX12-CU-NEXT:    s_endpgm
1778    ptr addrspace(1) %out, i32 %in) {
1779entry:
1780  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
1781  ret void
1782}
1783
1784define amdgpu_kernel void @global_agent_release_atomicrmw(
1785; GFX6-LABEL: global_agent_release_atomicrmw:
1786; GFX6:       ; %bb.0: ; %entry
1787; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1788; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1789; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1790; GFX6-NEXT:    s_mov_b32 s11, s5
1791; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1792; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1793; GFX6-NEXT:    s_mov_b32 s10, -1
1794; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1795; GFX6-NEXT:    s_mov_b32 s5, s11
1796; GFX6-NEXT:    s_mov_b32 s6, s10
1797; GFX6-NEXT:    s_mov_b32 s7, s9
1798; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1799; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1800; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1801; GFX6-NEXT:    s_endpgm
1802;
1803; GFX7-LABEL: global_agent_release_atomicrmw:
1804; GFX7:       ; %bb.0: ; %entry
1805; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1806; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1807; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1808; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1809; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1810; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1811; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1812; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1813; GFX7-NEXT:    s_endpgm
1814;
1815; GFX10-WGP-LABEL: global_agent_release_atomicrmw:
1816; GFX10-WGP:       ; %bb.0: ; %entry
1817; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1818; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1819; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1820; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1821; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1822; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1823; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1824; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1825; GFX10-WGP-NEXT:    s_endpgm
1826;
1827; GFX10-CU-LABEL: global_agent_release_atomicrmw:
1828; GFX10-CU:       ; %bb.0: ; %entry
1829; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1830; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1831; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1832; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1833; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1834; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1835; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1836; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1837; GFX10-CU-NEXT:    s_endpgm
1838;
1839; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw:
1840; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1841; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1842; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1843; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1844; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1845; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1846; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1847; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1848; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1849; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1850; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1851; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1852; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1853; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1854; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1855; SKIP-CACHE-INV-NEXT:    s_endpgm
1856;
1857; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw:
1858; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1859; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1860; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1861; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1862; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1863; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1864; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1865; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1866; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1867;
1868; GFX90A-TGSPLIT-LABEL: global_agent_release_atomicrmw:
1869; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1870; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1871; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1872; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1873; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1874; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1875; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1876; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1877; GFX90A-TGSPLIT-NEXT:    s_endpgm
1878;
1879; GFX940-NOTTGSPLIT-LABEL: global_agent_release_atomicrmw:
1880; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1881; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1882; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1883; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1884; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1885; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1886; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
1887; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1888; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1889; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1890;
1891; GFX940-TGSPLIT-LABEL: global_agent_release_atomicrmw:
1892; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1893; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1894; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1895; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1896; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1897; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1898; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
1899; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1900; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
1901; GFX940-TGSPLIT-NEXT:    s_endpgm
1902;
1903; GFX11-WGP-LABEL: global_agent_release_atomicrmw:
1904; GFX11-WGP:       ; %bb.0: ; %entry
1905; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1906; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1907; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1908; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1909; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1910; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1911; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1912; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1913; GFX11-WGP-NEXT:    s_endpgm
1914;
1915; GFX11-CU-LABEL: global_agent_release_atomicrmw:
1916; GFX11-CU:       ; %bb.0: ; %entry
1917; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1918; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1919; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1920; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1921; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1922; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1923; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1924; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1925; GFX11-CU-NEXT:    s_endpgm
1926;
1927; GFX12-WGP-LABEL: global_agent_release_atomicrmw:
1928; GFX12-WGP:       ; %bb.0: ; %entry
1929; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1930; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1931; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1932; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1933; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1934; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1935; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1936; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1937; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1938; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1939; GFX12-WGP-NEXT:    s_endpgm
1940;
1941; GFX12-CU-LABEL: global_agent_release_atomicrmw:
1942; GFX12-CU:       ; %bb.0: ; %entry
1943; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1944; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1945; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1946; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1947; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1948; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1949; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1950; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1951; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1952; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
1953; GFX12-CU-NEXT:    s_endpgm
1954    ptr addrspace(1) %out, i32 %in) {
1955entry:
1956  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") release
1957  ret void
1958}
1959
1960define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
1961; GFX6-LABEL: global_agent_acq_rel_atomicrmw:
1962; GFX6:       ; %bb.0: ; %entry
1963; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1964; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1965; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1966; GFX6-NEXT:    s_mov_b32 s11, s5
1967; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1968; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1969; GFX6-NEXT:    s_mov_b32 s10, -1
1970; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1971; GFX6-NEXT:    s_mov_b32 s5, s11
1972; GFX6-NEXT:    s_mov_b32 s6, s10
1973; GFX6-NEXT:    s_mov_b32 s7, s9
1974; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1975; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1976; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1977; GFX6-NEXT:    s_waitcnt vmcnt(0)
1978; GFX6-NEXT:    buffer_wbinvl1
1979; GFX6-NEXT:    s_endpgm
1980;
1981; GFX7-LABEL: global_agent_acq_rel_atomicrmw:
1982; GFX7:       ; %bb.0: ; %entry
1983; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1984; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1985; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1986; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1987; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1988; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1989; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1990; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1991; GFX7-NEXT:    s_waitcnt vmcnt(0)
1992; GFX7-NEXT:    buffer_wbinvl1_vol
1993; GFX7-NEXT:    s_endpgm
1994;
1995; GFX10-WGP-LABEL: global_agent_acq_rel_atomicrmw:
1996; GFX10-WGP:       ; %bb.0: ; %entry
1997; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1998; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1999; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2000; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2001; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2002; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2003; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2004; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
2005; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2006; GFX10-WGP-NEXT:    buffer_gl1_inv
2007; GFX10-WGP-NEXT:    buffer_gl0_inv
2008; GFX10-WGP-NEXT:    s_endpgm
2009;
2010; GFX10-CU-LABEL: global_agent_acq_rel_atomicrmw:
2011; GFX10-CU:       ; %bb.0: ; %entry
2012; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2013; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2014; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2015; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2016; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2017; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2018; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2019; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
2020; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2021; GFX10-CU-NEXT:    buffer_gl1_inv
2022; GFX10-CU-NEXT:    buffer_gl0_inv
2023; GFX10-CU-NEXT:    s_endpgm
2024;
2025; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw:
2026; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2027; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2028; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2029; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2030; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2031; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2032; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2033; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2034; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2035; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2036; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2037; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2038; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2039; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2040; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
2041; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2042; SKIP-CACHE-INV-NEXT:    s_endpgm
2043;
2044; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw:
2045; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2046; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2047; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2048; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2049; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2050; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2051; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2052; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2053; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2054; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2055; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2056;
2057; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw:
2058; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2059; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2060; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2061; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2062; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2063; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2064; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2065; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2066; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2067; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2068; GFX90A-TGSPLIT-NEXT:    s_endpgm
2069;
2070; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_atomicrmw:
2071; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2072; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2073; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2074; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2075; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2076; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2077; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
2078; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2079; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
2080; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2081; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
2082; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2083;
2084; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_atomicrmw:
2085; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2086; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2087; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2088; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2089; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2090; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2091; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
2092; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2093; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
2094; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2095; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
2096; GFX940-TGSPLIT-NEXT:    s_endpgm
2097;
2098; GFX11-WGP-LABEL: global_agent_acq_rel_atomicrmw:
2099; GFX11-WGP:       ; %bb.0: ; %entry
2100; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2101; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2102; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2103; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2104; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2105; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2106; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2107; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2108; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2109; GFX11-WGP-NEXT:    buffer_gl1_inv
2110; GFX11-WGP-NEXT:    buffer_gl0_inv
2111; GFX11-WGP-NEXT:    s_endpgm
2112;
2113; GFX11-CU-LABEL: global_agent_acq_rel_atomicrmw:
2114; GFX11-CU:       ; %bb.0: ; %entry
2115; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2116; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2117; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2118; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2119; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2120; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2121; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2122; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2123; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2124; GFX11-CU-NEXT:    buffer_gl1_inv
2125; GFX11-CU-NEXT:    buffer_gl0_inv
2126; GFX11-CU-NEXT:    s_endpgm
2127;
2128; GFX12-WGP-LABEL: global_agent_acq_rel_atomicrmw:
2129; GFX12-WGP:       ; %bb.0: ; %entry
2130; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2131; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2132; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2133; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2134; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2135; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2136; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2137; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2138; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2139; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
2140; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2141; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
2142; GFX12-WGP-NEXT:    s_endpgm
2143;
2144; GFX12-CU-LABEL: global_agent_acq_rel_atomicrmw:
2145; GFX12-CU:       ; %bb.0: ; %entry
2146; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2147; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2148; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2149; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2150; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2151; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2152; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2153; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2154; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2155; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
2156; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2157; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
2158; GFX12-CU-NEXT:    s_endpgm
2159    ptr addrspace(1) %out, i32 %in) {
2160entry:
2161  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
2162  ret void
2163}
2164
2165define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
2166; GFX6-LABEL: global_agent_seq_cst_atomicrmw:
2167; GFX6:       ; %bb.0: ; %entry
2168; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2169; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2170; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2171; GFX6-NEXT:    s_mov_b32 s11, s5
2172; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2173; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2174; GFX6-NEXT:    s_mov_b32 s10, -1
2175; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2176; GFX6-NEXT:    s_mov_b32 s5, s11
2177; GFX6-NEXT:    s_mov_b32 s6, s10
2178; GFX6-NEXT:    s_mov_b32 s7, s9
2179; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2180; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2181; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
2182; GFX6-NEXT:    s_waitcnt vmcnt(0)
2183; GFX6-NEXT:    buffer_wbinvl1
2184; GFX6-NEXT:    s_endpgm
2185;
2186; GFX7-LABEL: global_agent_seq_cst_atomicrmw:
2187; GFX7:       ; %bb.0: ; %entry
2188; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
2189; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
2190; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2191; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2192; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2193; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2194; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2195; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
2196; GFX7-NEXT:    s_waitcnt vmcnt(0)
2197; GFX7-NEXT:    buffer_wbinvl1_vol
2198; GFX7-NEXT:    s_endpgm
2199;
2200; GFX10-WGP-LABEL: global_agent_seq_cst_atomicrmw:
2201; GFX10-WGP:       ; %bb.0: ; %entry
2202; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2203; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2204; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2205; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2206; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2207; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2208; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2209; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
2210; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2211; GFX10-WGP-NEXT:    buffer_gl1_inv
2212; GFX10-WGP-NEXT:    buffer_gl0_inv
2213; GFX10-WGP-NEXT:    s_endpgm
2214;
2215; GFX10-CU-LABEL: global_agent_seq_cst_atomicrmw:
2216; GFX10-CU:       ; %bb.0: ; %entry
2217; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2218; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2219; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2220; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2221; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2222; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2223; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2224; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
2225; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2226; GFX10-CU-NEXT:    buffer_gl1_inv
2227; GFX10-CU-NEXT:    buffer_gl0_inv
2228; GFX10-CU-NEXT:    s_endpgm
2229;
2230; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw:
2231; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2232; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2233; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2234; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2235; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2236; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2237; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2238; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2239; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2240; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2241; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2242; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2243; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2244; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2245; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
2246; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2247; SKIP-CACHE-INV-NEXT:    s_endpgm
2248;
2249; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw:
2250; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2251; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2252; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2253; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2254; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2255; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2256; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2257; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2258; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2259; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2260; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2261;
2262; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw:
2263; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2264; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2265; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2266; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2267; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2268; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2269; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2270; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2271; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2272; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2273; GFX90A-TGSPLIT-NEXT:    s_endpgm
2274;
2275; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_atomicrmw:
2276; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2277; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2278; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2279; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2280; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2281; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2282; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
2283; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2284; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
2285; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2286; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
2287; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2288;
2289; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_atomicrmw:
2290; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2291; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2292; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2293; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2294; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2295; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2296; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
2297; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2298; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
2299; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2300; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
2301; GFX940-TGSPLIT-NEXT:    s_endpgm
2302;
2303; GFX11-WGP-LABEL: global_agent_seq_cst_atomicrmw:
2304; GFX11-WGP:       ; %bb.0: ; %entry
2305; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2306; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2307; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2308; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2309; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2310; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2311; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2312; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2313; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2314; GFX11-WGP-NEXT:    buffer_gl1_inv
2315; GFX11-WGP-NEXT:    buffer_gl0_inv
2316; GFX11-WGP-NEXT:    s_endpgm
2317;
2318; GFX11-CU-LABEL: global_agent_seq_cst_atomicrmw:
2319; GFX11-CU:       ; %bb.0: ; %entry
2320; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2321; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2322; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2323; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2324; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2325; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2326; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2327; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2328; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2329; GFX11-CU-NEXT:    buffer_gl1_inv
2330; GFX11-CU-NEXT:    buffer_gl0_inv
2331; GFX11-CU-NEXT:    s_endpgm
2332;
2333; GFX12-WGP-LABEL: global_agent_seq_cst_atomicrmw:
2334; GFX12-WGP:       ; %bb.0: ; %entry
2335; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2336; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2337; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2338; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2339; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2340; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2341; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2342; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2343; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2344; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
2345; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2346; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
2347; GFX12-WGP-NEXT:    s_endpgm
2348;
2349; GFX12-CU-LABEL: global_agent_seq_cst_atomicrmw:
2350; GFX12-CU:       ; %bb.0: ; %entry
2351; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2352; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2353; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2354; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2355; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2356; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2357; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2358; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2359; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2360; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
2361; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2362; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
2363; GFX12-CU-NEXT:    s_endpgm
2364    ptr addrspace(1) %out, i32 %in) {
2365entry:
2366  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
2367  ret void
2368}
2369
2370define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
2371; GFX6-LABEL: global_agent_acquire_ret_atomicrmw:
2372; GFX6:       ; %bb.0: ; %entry
2373; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2374; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2375; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2376; GFX6-NEXT:    s_mov_b32 s11, s5
2377; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2378; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2379; GFX6-NEXT:    s_mov_b32 s10, -1
2380; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2381; GFX6-NEXT:    s_mov_b32 s5, s11
2382; GFX6-NEXT:    s_mov_b32 s6, s10
2383; GFX6-NEXT:    s_mov_b32 s7, s9
2384; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2385; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2386; GFX6-NEXT:    s_waitcnt vmcnt(0)
2387; GFX6-NEXT:    buffer_wbinvl1
2388; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2389; GFX6-NEXT:    s_endpgm
2390;
2391; GFX7-LABEL: global_agent_acquire_ret_atomicrmw:
2392; GFX7:       ; %bb.0: ; %entry
2393; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2394; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2395; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2396; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2397; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2398; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2399; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2400; GFX7-NEXT:    s_waitcnt vmcnt(0)
2401; GFX7-NEXT:    buffer_wbinvl1_vol
2402; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2403; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2404; GFX7-NEXT:    flat_store_dword v[0:1], v2
2405; GFX7-NEXT:    s_endpgm
2406;
2407; GFX10-WGP-LABEL: global_agent_acquire_ret_atomicrmw:
2408; GFX10-WGP:       ; %bb.0: ; %entry
2409; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2410; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2411; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2412; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2413; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2414; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2415; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2416; GFX10-WGP-NEXT:    buffer_gl1_inv
2417; GFX10-WGP-NEXT:    buffer_gl0_inv
2418; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2419; GFX10-WGP-NEXT:    s_endpgm
2420;
2421; GFX10-CU-LABEL: global_agent_acquire_ret_atomicrmw:
2422; GFX10-CU:       ; %bb.0: ; %entry
2423; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2424; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2425; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2426; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2427; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2428; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2429; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2430; GFX10-CU-NEXT:    buffer_gl1_inv
2431; GFX10-CU-NEXT:    buffer_gl0_inv
2432; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2433; GFX10-CU-NEXT:    s_endpgm
2434;
2435; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw:
2436; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2437; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2438; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2439; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2440; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2441; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2442; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2443; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2444; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2445; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2446; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2447; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2448; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2449; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2450; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2451; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2452; SKIP-CACHE-INV-NEXT:    s_endpgm
2453;
2454; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw:
2455; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2456; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2457; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2458; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2459; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2460; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2461; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2462; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2463; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2464; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2465; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2466;
2467; GFX90A-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw:
2468; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2469; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2470; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2471; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2472; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2473; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2474; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2475; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2476; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2477; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2478; GFX90A-TGSPLIT-NEXT:    s_endpgm
2479;
2480; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw:
2481; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2482; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2483; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2484; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2485; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2486; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2487; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2488; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2489; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
2490; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2491; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2492;
2493; GFX940-TGSPLIT-LABEL: global_agent_acquire_ret_atomicrmw:
2494; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2495; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2496; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2497; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2498; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2499; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2500; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2501; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2502; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
2503; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2504; GFX940-TGSPLIT-NEXT:    s_endpgm
2505;
2506; GFX11-WGP-LABEL: global_agent_acquire_ret_atomicrmw:
2507; GFX11-WGP:       ; %bb.0: ; %entry
2508; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2509; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2510; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2511; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2512; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2513; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2514; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2515; GFX11-WGP-NEXT:    buffer_gl1_inv
2516; GFX11-WGP-NEXT:    buffer_gl0_inv
2517; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2518; GFX11-WGP-NEXT:    s_endpgm
2519;
2520; GFX11-CU-LABEL: global_agent_acquire_ret_atomicrmw:
2521; GFX11-CU:       ; %bb.0: ; %entry
2522; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2523; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2524; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2525; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2526; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2527; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2528; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
2529; GFX11-CU-NEXT:    buffer_gl1_inv
2530; GFX11-CU-NEXT:    buffer_gl0_inv
2531; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2532; GFX11-CU-NEXT:    s_endpgm
2533;
2534; GFX12-WGP-LABEL: global_agent_acquire_ret_atomicrmw:
2535; GFX12-WGP:       ; %bb.0: ; %entry
2536; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2537; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2538; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2539; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2540; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2541; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2542; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
2543; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
2544; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2545; GFX12-WGP-NEXT:    s_endpgm
2546;
2547; GFX12-CU-LABEL: global_agent_acquire_ret_atomicrmw:
2548; GFX12-CU:       ; %bb.0: ; %entry
2549; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2550; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2551; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2552; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2553; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2554; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2555; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
2556; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
2557; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2558; GFX12-CU-NEXT:    s_endpgm
2559    ptr addrspace(1) %out, i32 %in) {
2560entry:
2561  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acquire
2562  store i32 %val, ptr addrspace(1) %out, align 4
2563  ret void
2564}
2565
2566define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
2567; GFX6-LABEL: global_agent_acq_rel_ret_atomicrmw:
2568; GFX6:       ; %bb.0: ; %entry
2569; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2570; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2571; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2572; GFX6-NEXT:    s_mov_b32 s11, s5
2573; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2574; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2575; GFX6-NEXT:    s_mov_b32 s10, -1
2576; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2577; GFX6-NEXT:    s_mov_b32 s5, s11
2578; GFX6-NEXT:    s_mov_b32 s6, s10
2579; GFX6-NEXT:    s_mov_b32 s7, s9
2580; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2581; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2582; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2583; GFX6-NEXT:    s_waitcnt vmcnt(0)
2584; GFX6-NEXT:    buffer_wbinvl1
2585; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2586; GFX6-NEXT:    s_endpgm
2587;
2588; GFX7-LABEL: global_agent_acq_rel_ret_atomicrmw:
2589; GFX7:       ; %bb.0: ; %entry
2590; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2591; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2592; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2593; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2594; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2595; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2596; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2597; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2598; GFX7-NEXT:    s_waitcnt vmcnt(0)
2599; GFX7-NEXT:    buffer_wbinvl1_vol
2600; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2601; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2602; GFX7-NEXT:    flat_store_dword v[0:1], v2
2603; GFX7-NEXT:    s_endpgm
2604;
2605; GFX10-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw:
2606; GFX10-WGP:       ; %bb.0: ; %entry
2607; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2608; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2609; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2610; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2611; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2612; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2613; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2614; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2615; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2616; GFX10-WGP-NEXT:    buffer_gl1_inv
2617; GFX10-WGP-NEXT:    buffer_gl0_inv
2618; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2619; GFX10-WGP-NEXT:    s_endpgm
2620;
2621; GFX10-CU-LABEL: global_agent_acq_rel_ret_atomicrmw:
2622; GFX10-CU:       ; %bb.0: ; %entry
2623; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2624; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2625; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2626; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2627; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2628; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2629; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2630; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2631; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2632; GFX10-CU-NEXT:    buffer_gl1_inv
2633; GFX10-CU-NEXT:    buffer_gl0_inv
2634; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2635; GFX10-CU-NEXT:    s_endpgm
2636;
2637; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw:
2638; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2639; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2640; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2641; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2642; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2643; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2644; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2645; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2646; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2647; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2648; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2649; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2650; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2651; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2652; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2653; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2654; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2655; SKIP-CACHE-INV-NEXT:    s_endpgm
2656;
2657; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw:
2658; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2659; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2660; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2661; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2662; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2663; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2664; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2665; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2666; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2667; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2668; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2669; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2670;
2671; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw:
2672; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2673; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2674; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2675; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2676; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2677; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2678; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2679; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2680; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2681; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2682; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2683; GFX90A-TGSPLIT-NEXT:    s_endpgm
2684;
2685; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw:
2686; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2687; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2688; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2689; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2690; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2691; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2692; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
2693; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2694; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2695; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2696; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
2697; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2698; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2699;
2700; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_ret_atomicrmw:
2701; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2702; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2703; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2704; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2705; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2706; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2707; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
2708; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2709; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2710; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2711; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
2712; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2713; GFX940-TGSPLIT-NEXT:    s_endpgm
2714;
2715; GFX11-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw:
2716; GFX11-WGP:       ; %bb.0: ; %entry
2717; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2718; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2719; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2720; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2721; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2722; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2723; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2724; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2725; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2726; GFX11-WGP-NEXT:    buffer_gl1_inv
2727; GFX11-WGP-NEXT:    buffer_gl0_inv
2728; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2729; GFX11-WGP-NEXT:    s_endpgm
2730;
2731; GFX11-CU-LABEL: global_agent_acq_rel_ret_atomicrmw:
2732; GFX11-CU:       ; %bb.0: ; %entry
2733; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2734; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2735; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2736; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2737; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2738; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2739; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2740; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2741; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
2742; GFX11-CU-NEXT:    buffer_gl1_inv
2743; GFX11-CU-NEXT:    buffer_gl0_inv
2744; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2745; GFX11-CU-NEXT:    s_endpgm
2746;
2747; GFX12-WGP-LABEL: global_agent_acq_rel_ret_atomicrmw:
2748; GFX12-WGP:       ; %bb.0: ; %entry
2749; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2750; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2751; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2752; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2753; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2754; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2755; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2756; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2757; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2758; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2759; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2760; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2761; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
2762; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
2763; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2764; GFX12-WGP-NEXT:    s_endpgm
2765;
2766; GFX12-CU-LABEL: global_agent_acq_rel_ret_atomicrmw:
2767; GFX12-CU:       ; %bb.0: ; %entry
2768; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2769; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2770; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2771; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2772; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2773; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2774; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2775; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2776; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2777; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2778; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2779; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2780; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
2781; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
2782; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2783; GFX12-CU-NEXT:    s_endpgm
2784    ptr addrspace(1) %out, i32 %in) {
2785entry:
2786  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") acq_rel
2787  store i32 %val, ptr addrspace(1) %out, align 4
2788  ret void
2789}
2790
2791define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
2792; GFX6-LABEL: global_agent_seq_cst_ret_atomicrmw:
2793; GFX6:       ; %bb.0: ; %entry
2794; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2795; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2796; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2797; GFX6-NEXT:    s_mov_b32 s11, s5
2798; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2799; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2800; GFX6-NEXT:    s_mov_b32 s10, -1
2801; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2802; GFX6-NEXT:    s_mov_b32 s5, s11
2803; GFX6-NEXT:    s_mov_b32 s6, s10
2804; GFX6-NEXT:    s_mov_b32 s7, s9
2805; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2806; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2807; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2808; GFX6-NEXT:    s_waitcnt vmcnt(0)
2809; GFX6-NEXT:    buffer_wbinvl1
2810; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2811; GFX6-NEXT:    s_endpgm
2812;
2813; GFX7-LABEL: global_agent_seq_cst_ret_atomicrmw:
2814; GFX7:       ; %bb.0: ; %entry
2815; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2816; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2817; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2818; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2819; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2820; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2821; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2822; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2823; GFX7-NEXT:    s_waitcnt vmcnt(0)
2824; GFX7-NEXT:    buffer_wbinvl1_vol
2825; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2826; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2827; GFX7-NEXT:    flat_store_dword v[0:1], v2
2828; GFX7-NEXT:    s_endpgm
2829;
2830; GFX10-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw:
2831; GFX10-WGP:       ; %bb.0: ; %entry
2832; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2833; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2834; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2835; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2836; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2837; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2838; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2839; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2840; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2841; GFX10-WGP-NEXT:    buffer_gl1_inv
2842; GFX10-WGP-NEXT:    buffer_gl0_inv
2843; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2844; GFX10-WGP-NEXT:    s_endpgm
2845;
2846; GFX10-CU-LABEL: global_agent_seq_cst_ret_atomicrmw:
2847; GFX10-CU:       ; %bb.0: ; %entry
2848; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2849; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2850; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2851; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2852; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2853; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2854; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2855; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2856; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2857; GFX10-CU-NEXT:    buffer_gl1_inv
2858; GFX10-CU-NEXT:    buffer_gl0_inv
2859; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2860; GFX10-CU-NEXT:    s_endpgm
2861;
2862; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw:
2863; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2864; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2865; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2866; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2867; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2868; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2869; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2870; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2871; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2872; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2873; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2874; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2875; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2876; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2877; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2878; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2879; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2880; SKIP-CACHE-INV-NEXT:    s_endpgm
2881;
2882; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw:
2883; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2884; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2885; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2886; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2887; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2888; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2889; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2890; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2891; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2892; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2893; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2894; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2895;
2896; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw:
2897; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2898; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2899; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2900; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2901; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2902; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2903; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2904; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2905; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2906; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2907; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2908; GFX90A-TGSPLIT-NEXT:    s_endpgm
2909;
2910; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw:
2911; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2912; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2913; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2914; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2915; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2916; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2917; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
2918; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2919; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2920; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2921; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
2922; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2923; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2924;
2925; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_ret_atomicrmw:
2926; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2927; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2928; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2929; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2930; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2931; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2932; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
2933; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2934; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
2935; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2936; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
2937; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2938; GFX940-TGSPLIT-NEXT:    s_endpgm
2939;
2940; GFX11-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw:
2941; GFX11-WGP:       ; %bb.0: ; %entry
2942; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2943; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2944; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2945; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2946; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2947; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2948; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2949; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2950; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2951; GFX11-WGP-NEXT:    buffer_gl1_inv
2952; GFX11-WGP-NEXT:    buffer_gl0_inv
2953; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2954; GFX11-WGP-NEXT:    s_endpgm
2955;
2956; GFX11-CU-LABEL: global_agent_seq_cst_ret_atomicrmw:
2957; GFX11-CU:       ; %bb.0: ; %entry
2958; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2959; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2960; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2961; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2962; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2963; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2964; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2965; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2966; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
2967; GFX11-CU-NEXT:    buffer_gl1_inv
2968; GFX11-CU-NEXT:    buffer_gl0_inv
2969; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2970; GFX11-CU-NEXT:    s_endpgm
2971;
2972; GFX12-WGP-LABEL: global_agent_seq_cst_ret_atomicrmw:
2973; GFX12-WGP:       ; %bb.0: ; %entry
2974; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2975; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2976; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2977; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2978; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2979; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2980; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2981; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2982; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2983; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
2984; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2985; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2986; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
2987; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
2988; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2989; GFX12-WGP-NEXT:    s_endpgm
2990;
2991; GFX12-CU-LABEL: global_agent_seq_cst_ret_atomicrmw:
2992; GFX12-CU:       ; %bb.0: ; %entry
2993; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2994; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2995; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2996; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2997; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2998; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2999; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3000; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3001; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
3002; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
3003; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3004; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3005; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
3006; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
3007; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
3008; GFX12-CU-NEXT:    s_endpgm
3009    ptr addrspace(1) %out, i32 %in) {
3010entry:
3011  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent") seq_cst
3012  store i32 %val, ptr addrspace(1) %out, align 4
3013  ret void
3014}
3015
3016define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
3017; GFX6-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3018; GFX6:       ; %bb.0: ; %entry
3019; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3020; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3021; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3022; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3023; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3024; GFX6-NEXT:    s_mov_b32 s12, s5
3025; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3026; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3027; GFX6-NEXT:    s_mov_b32 s11, -1
3028; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3029; GFX6-NEXT:    s_mov_b32 s5, s12
3030; GFX6-NEXT:    s_mov_b32 s6, s11
3031; GFX6-NEXT:    s_mov_b32 s7, s10
3032; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3033; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3034; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3035; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3036; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3037; GFX6-NEXT:    s_endpgm
3038;
3039; GFX7-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3040; GFX7:       ; %bb.0: ; %entry
3041; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3042; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3043; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3044; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3045; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3046; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3047; GFX7-NEXT:    s_mov_b32 s4, s8
3048; GFX7-NEXT:    s_mov_b32 s5, s9
3049; GFX7-NEXT:    s_mov_b32 s9, s10
3050; GFX7-NEXT:    s_mov_b32 s8, s11
3051; GFX7-NEXT:    s_add_u32 s4, s4, s9
3052; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3053; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3054; GFX7-NEXT:    s_mov_b32 s5, s8
3055; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3056; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3057; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3058; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3059; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3060; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3061; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3062; GFX7-NEXT:    s_endpgm
3063;
3064; GFX10-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3065; GFX10-WGP:       ; %bb.0: ; %entry
3066; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3067; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3068; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3069; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3070; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3071; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3072; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3073; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3074; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3075; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3076; GFX10-WGP-NEXT:    s_endpgm
3077;
3078; GFX10-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3079; GFX10-CU:       ; %bb.0: ; %entry
3080; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3081; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3082; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3083; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3084; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3085; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3086; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3087; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3088; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3089; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3090; GFX10-CU-NEXT:    s_endpgm
3091;
3092; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3093; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3094; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3095; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3096; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3097; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3098; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3099; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3100; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3101; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3102; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3103; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3104; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3105; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3106; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3107; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3108; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3109; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3110; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3111; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3112; SKIP-CACHE-INV-NEXT:    s_endpgm
3113;
3114; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3115; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3116; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3117; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3118; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3119; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3120; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3121; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3122; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3123; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3124; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3125; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3126; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3127;
3128; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3129; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3130; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3131; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3132; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3133; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3134; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3135; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3136; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3137; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3138; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3139; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3140; GFX90A-TGSPLIT-NEXT:    s_endpgm
3141;
3142; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3143; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3144; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3145; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3146; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3147; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3148; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3149; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3150; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3151; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3152; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3153; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3154; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3155;
3156; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3157; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3158; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3159; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3160; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3161; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3162; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3163; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3164; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3165; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3166; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3167; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3168; GFX940-TGSPLIT-NEXT:    s_endpgm
3169;
3170; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3171; GFX11-WGP:       ; %bb.0: ; %entry
3172; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3173; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3174; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3175; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3176; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3177; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3178; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3179; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3180; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3181; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3182; GFX11-WGP-NEXT:    s_endpgm
3183;
3184; GFX11-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3185; GFX11-CU:       ; %bb.0: ; %entry
3186; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3187; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3188; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3189; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3190; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3191; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3192; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3193; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3194; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3195; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3196; GFX11-CU-NEXT:    s_endpgm
3197;
3198; GFX12-WGP-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3199; GFX12-WGP:       ; %bb.0: ; %entry
3200; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3201; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3202; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3203; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3204; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3205; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3206; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3207; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3208; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3209; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3210; GFX12-WGP-NEXT:    s_endpgm
3211;
3212; GFX12-CU-LABEL: global_agent_monotonic_monotonic_cmpxchg:
3213; GFX12-CU:       ; %bb.0: ; %entry
3214; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3215; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3216; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3217; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3218; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3219; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3220; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3221; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3222; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3223; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3224; GFX12-CU-NEXT:    s_endpgm
3225    ptr addrspace(1) %out, i32 %in, i32 %old) {
3226entry:
3227  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3228  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
3229  ret void
3230}
3231
3232define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
3233; GFX6-LABEL: global_agent_acquire_monotonic_cmpxchg:
3234; GFX6:       ; %bb.0: ; %entry
3235; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3236; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3237; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3238; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3239; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3240; GFX6-NEXT:    s_mov_b32 s12, s5
3241; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3242; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3243; GFX6-NEXT:    s_mov_b32 s11, -1
3244; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3245; GFX6-NEXT:    s_mov_b32 s5, s12
3246; GFX6-NEXT:    s_mov_b32 s6, s11
3247; GFX6-NEXT:    s_mov_b32 s7, s10
3248; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3249; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3250; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3251; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3252; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3253; GFX6-NEXT:    s_waitcnt vmcnt(0)
3254; GFX6-NEXT:    buffer_wbinvl1
3255; GFX6-NEXT:    s_endpgm
3256;
3257; GFX7-LABEL: global_agent_acquire_monotonic_cmpxchg:
3258; GFX7:       ; %bb.0: ; %entry
3259; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3260; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3261; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3262; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3263; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3264; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3265; GFX7-NEXT:    s_mov_b32 s4, s8
3266; GFX7-NEXT:    s_mov_b32 s5, s9
3267; GFX7-NEXT:    s_mov_b32 s9, s10
3268; GFX7-NEXT:    s_mov_b32 s8, s11
3269; GFX7-NEXT:    s_add_u32 s4, s4, s9
3270; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3271; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3272; GFX7-NEXT:    s_mov_b32 s5, s8
3273; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3274; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3275; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3276; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3277; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3278; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3279; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3280; GFX7-NEXT:    s_waitcnt vmcnt(0)
3281; GFX7-NEXT:    buffer_wbinvl1_vol
3282; GFX7-NEXT:    s_endpgm
3283;
3284; GFX10-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg:
3285; GFX10-WGP:       ; %bb.0: ; %entry
3286; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3287; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3288; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3289; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3290; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3291; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3292; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3293; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3294; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3295; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3296; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3297; GFX10-WGP-NEXT:    buffer_gl1_inv
3298; GFX10-WGP-NEXT:    buffer_gl0_inv
3299; GFX10-WGP-NEXT:    s_endpgm
3300;
3301; GFX10-CU-LABEL: global_agent_acquire_monotonic_cmpxchg:
3302; GFX10-CU:       ; %bb.0: ; %entry
3303; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3304; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3305; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3306; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3307; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3308; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3309; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3310; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3311; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3312; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3313; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3314; GFX10-CU-NEXT:    buffer_gl1_inv
3315; GFX10-CU-NEXT:    buffer_gl0_inv
3316; GFX10-CU-NEXT:    s_endpgm
3317;
3318; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg:
3319; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3320; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3321; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3322; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3323; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3324; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3325; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3326; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3327; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3328; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3329; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3330; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3331; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3332; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3333; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3334; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3335; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3336; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3337; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3338; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
3339; SKIP-CACHE-INV-NEXT:    s_endpgm
3340;
3341; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg:
3342; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3343; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3344; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3345; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3346; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3347; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3348; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3349; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3350; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3351; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3352; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3353; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3354; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
3355; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3356;
3357; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg:
3358; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3359; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3360; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3361; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3362; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3363; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3364; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3365; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3366; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3367; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3368; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3369; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3370; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
3371; GFX90A-TGSPLIT-NEXT:    s_endpgm
3372;
3373; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg:
3374; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3375; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3376; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3377; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3378; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3379; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3380; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3381; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3382; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3383; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3384; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3385; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3386; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
3387; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3388;
3389; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_cmpxchg:
3390; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3391; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3392; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3393; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3394; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3395; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3396; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3397; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3398; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3399; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3400; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3401; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3402; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
3403; GFX940-TGSPLIT-NEXT:    s_endpgm
3404;
3405; GFX11-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg:
3406; GFX11-WGP:       ; %bb.0: ; %entry
3407; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3408; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3409; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3410; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3411; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3412; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3413; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3414; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3415; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3416; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3417; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3418; GFX11-WGP-NEXT:    buffer_gl1_inv
3419; GFX11-WGP-NEXT:    buffer_gl0_inv
3420; GFX11-WGP-NEXT:    s_endpgm
3421;
3422; GFX11-CU-LABEL: global_agent_acquire_monotonic_cmpxchg:
3423; GFX11-CU:       ; %bb.0: ; %entry
3424; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3425; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3426; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3427; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3428; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3429; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3430; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3431; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3432; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3433; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3434; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3435; GFX11-CU-NEXT:    buffer_gl1_inv
3436; GFX11-CU-NEXT:    buffer_gl0_inv
3437; GFX11-CU-NEXT:    s_endpgm
3438;
3439; GFX12-WGP-LABEL: global_agent_acquire_monotonic_cmpxchg:
3440; GFX12-WGP:       ; %bb.0: ; %entry
3441; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3442; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3443; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3444; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3445; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3446; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3447; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3448; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3449; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3450; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3451; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3452; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
3453; GFX12-WGP-NEXT:    s_endpgm
3454;
3455; GFX12-CU-LABEL: global_agent_acquire_monotonic_cmpxchg:
3456; GFX12-CU:       ; %bb.0: ; %entry
3457; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3458; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3459; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3460; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3461; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3462; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3463; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3464; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3465; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3466; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3467; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3468; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
3469; GFX12-CU-NEXT:    s_endpgm
3470    ptr addrspace(1) %out, i32 %in, i32 %old) {
3471entry:
3472  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3473  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
3474  ret void
3475}
3476
3477define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
3478; GFX6-LABEL: global_agent_release_monotonic_cmpxchg:
3479; GFX6:       ; %bb.0: ; %entry
3480; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3481; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3482; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3483; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3484; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3485; GFX6-NEXT:    s_mov_b32 s12, s5
3486; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3487; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3488; GFX6-NEXT:    s_mov_b32 s11, -1
3489; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3490; GFX6-NEXT:    s_mov_b32 s5, s12
3491; GFX6-NEXT:    s_mov_b32 s6, s11
3492; GFX6-NEXT:    s_mov_b32 s7, s10
3493; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3494; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3495; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3496; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3497; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3498; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3499; GFX6-NEXT:    s_endpgm
3500;
3501; GFX7-LABEL: global_agent_release_monotonic_cmpxchg:
3502; GFX7:       ; %bb.0: ; %entry
3503; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3504; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3505; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3506; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3507; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3508; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3509; GFX7-NEXT:    s_mov_b32 s4, s8
3510; GFX7-NEXT:    s_mov_b32 s5, s9
3511; GFX7-NEXT:    s_mov_b32 s9, s10
3512; GFX7-NEXT:    s_mov_b32 s8, s11
3513; GFX7-NEXT:    s_add_u32 s4, s4, s9
3514; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3515; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3516; GFX7-NEXT:    s_mov_b32 s5, s8
3517; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3518; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3519; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3520; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3521; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3522; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3523; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3524; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3525; GFX7-NEXT:    s_endpgm
3526;
3527; GFX10-WGP-LABEL: global_agent_release_monotonic_cmpxchg:
3528; GFX10-WGP:       ; %bb.0: ; %entry
3529; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3530; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3531; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3532; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3533; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3534; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3535; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3536; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3537; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3538; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3539; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3540; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3541; GFX10-WGP-NEXT:    s_endpgm
3542;
3543; GFX10-CU-LABEL: global_agent_release_monotonic_cmpxchg:
3544; GFX10-CU:       ; %bb.0: ; %entry
3545; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3546; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3547; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3548; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3549; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3550; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3551; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3552; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3553; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3554; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3555; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3556; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3557; GFX10-CU-NEXT:    s_endpgm
3558;
3559; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg:
3560; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3561; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3562; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3563; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3564; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3565; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3566; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3567; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3568; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3569; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3570; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3571; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3572; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3573; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3574; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3575; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3576; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3577; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3578; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3579; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3580; SKIP-CACHE-INV-NEXT:    s_endpgm
3581;
3582; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg:
3583; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3584; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3585; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3586; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3587; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3588; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3589; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3590; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3591; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3592; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3593; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3594; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3595; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3596;
3597; GFX90A-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg:
3598; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3599; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3600; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3601; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3602; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3603; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3604; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3605; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3606; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3607; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3608; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3609; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3610; GFX90A-TGSPLIT-NEXT:    s_endpgm
3611;
3612; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg:
3613; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3614; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3615; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3616; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3617; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3618; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3619; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3620; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3621; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3622; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3623; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
3624; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3625; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3626; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3627;
3628; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_cmpxchg:
3629; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3630; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3631; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3632; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3633; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3634; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3635; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3636; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3637; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3638; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3639; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
3640; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3641; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3642; GFX940-TGSPLIT-NEXT:    s_endpgm
3643;
3644; GFX11-WGP-LABEL: global_agent_release_monotonic_cmpxchg:
3645; GFX11-WGP:       ; %bb.0: ; %entry
3646; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3647; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3648; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3649; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3650; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3651; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3652; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3653; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3654; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3655; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3656; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3657; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3658; GFX11-WGP-NEXT:    s_endpgm
3659;
3660; GFX11-CU-LABEL: global_agent_release_monotonic_cmpxchg:
3661; GFX11-CU:       ; %bb.0: ; %entry
3662; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3663; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3664; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3665; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3666; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3667; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3668; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3669; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3670; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3671; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3672; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3673; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3674; GFX11-CU-NEXT:    s_endpgm
3675;
3676; GFX12-WGP-LABEL: global_agent_release_monotonic_cmpxchg:
3677; GFX12-WGP:       ; %bb.0: ; %entry
3678; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3679; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3680; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3681; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3682; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3683; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3684; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3685; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3686; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3687; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
3688; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
3689; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3690; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
3691; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3692; GFX12-WGP-NEXT:    s_endpgm
3693;
3694; GFX12-CU-LABEL: global_agent_release_monotonic_cmpxchg:
3695; GFX12-CU:       ; %bb.0: ; %entry
3696; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3697; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3698; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3699; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3700; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3701; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3702; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3703; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3704; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3705; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3706; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3707; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3708; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
3709; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3710; GFX12-CU-NEXT:    s_endpgm
3711    ptr addrspace(1) %out, i32 %in, i32 %old) {
3712entry:
3713  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3714  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic
3715  ret void
3716}
3717
3718define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
3719; GFX6-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3720; GFX6:       ; %bb.0: ; %entry
3721; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3722; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3723; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3724; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3725; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3726; GFX6-NEXT:    s_mov_b32 s12, s5
3727; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3728; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3729; GFX6-NEXT:    s_mov_b32 s11, -1
3730; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3731; GFX6-NEXT:    s_mov_b32 s5, s12
3732; GFX6-NEXT:    s_mov_b32 s6, s11
3733; GFX6-NEXT:    s_mov_b32 s7, s10
3734; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3735; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3736; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3737; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3738; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3739; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3740; GFX6-NEXT:    s_waitcnt vmcnt(0)
3741; GFX6-NEXT:    buffer_wbinvl1
3742; GFX6-NEXT:    s_endpgm
3743;
3744; GFX7-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3745; GFX7:       ; %bb.0: ; %entry
3746; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3747; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3748; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3749; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3750; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3751; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3752; GFX7-NEXT:    s_mov_b32 s4, s8
3753; GFX7-NEXT:    s_mov_b32 s5, s9
3754; GFX7-NEXT:    s_mov_b32 s9, s10
3755; GFX7-NEXT:    s_mov_b32 s8, s11
3756; GFX7-NEXT:    s_add_u32 s4, s4, s9
3757; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3758; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3759; GFX7-NEXT:    s_mov_b32 s5, s8
3760; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3761; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3762; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3763; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3764; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3765; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3766; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3767; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3768; GFX7-NEXT:    s_waitcnt vmcnt(0)
3769; GFX7-NEXT:    buffer_wbinvl1_vol
3770; GFX7-NEXT:    s_endpgm
3771;
3772; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3773; GFX10-WGP:       ; %bb.0: ; %entry
3774; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3775; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3776; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3777; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3778; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3779; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3780; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3781; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3782; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3783; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3784; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3785; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3786; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3787; GFX10-WGP-NEXT:    buffer_gl1_inv
3788; GFX10-WGP-NEXT:    buffer_gl0_inv
3789; GFX10-WGP-NEXT:    s_endpgm
3790;
3791; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3792; GFX10-CU:       ; %bb.0: ; %entry
3793; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3794; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3795; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3796; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3797; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3798; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3799; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3800; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3801; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3802; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3803; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3804; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3805; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3806; GFX10-CU-NEXT:    buffer_gl1_inv
3807; GFX10-CU-NEXT:    buffer_gl0_inv
3808; GFX10-CU-NEXT:    s_endpgm
3809;
3810; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3811; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3812; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3813; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3814; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3815; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3816; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3817; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3818; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3819; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3820; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3821; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3822; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3823; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3824; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3825; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3826; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3827; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3828; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3829; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3830; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3831; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
3832; SKIP-CACHE-INV-NEXT:    s_endpgm
3833;
3834; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3835; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3836; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3837; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3838; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3839; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3840; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3841; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3842; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3843; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3844; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3845; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3846; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3847; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3848; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
3849; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3850;
3851; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3852; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3853; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3854; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3855; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3856; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3857; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3858; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3859; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3860; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3861; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3862; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3863; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3864; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3865; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
3866; GFX90A-TGSPLIT-NEXT:    s_endpgm
3867;
3868; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3869; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3870; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3871; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3872; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3873; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3874; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3875; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3876; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3877; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3878; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3879; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
3880; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3881; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3882; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3883; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
3884; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3885;
3886; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3887; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3888; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3889; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3890; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3891; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3892; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3893; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3894; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3895; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3896; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3897; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
3898; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3899; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
3900; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3901; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
3902; GFX940-TGSPLIT-NEXT:    s_endpgm
3903;
3904; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3905; GFX11-WGP:       ; %bb.0: ; %entry
3906; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3907; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3908; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3909; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3910; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3911; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3912; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3913; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3914; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3915; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3916; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3917; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3918; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3919; GFX11-WGP-NEXT:    buffer_gl1_inv
3920; GFX11-WGP-NEXT:    buffer_gl0_inv
3921; GFX11-WGP-NEXT:    s_endpgm
3922;
3923; GFX11-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3924; GFX11-CU:       ; %bb.0: ; %entry
3925; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3926; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3927; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3928; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3929; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3930; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3931; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3932; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3933; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3934; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3935; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3936; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3937; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3938; GFX11-CU-NEXT:    buffer_gl1_inv
3939; GFX11-CU-NEXT:    buffer_gl0_inv
3940; GFX11-CU-NEXT:    s_endpgm
3941;
3942; GFX12-WGP-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3943; GFX12-WGP:       ; %bb.0: ; %entry
3944; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3945; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3946; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3947; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3948; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3949; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3950; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3951; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3952; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3953; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
3954; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
3955; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3956; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
3957; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3958; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3959; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
3960; GFX12-WGP-NEXT:    s_endpgm
3961;
3962; GFX12-CU-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
3963; GFX12-CU:       ; %bb.0: ; %entry
3964; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3965; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3966; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3967; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3968; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3969; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3970; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3971; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3972; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3973; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3974; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3975; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3976; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
3977; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
3978; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3979; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
3980; GFX12-CU-NEXT:    s_endpgm
3981    ptr addrspace(1) %out, i32 %in, i32 %old) {
3982entry:
3983  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3984  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
3985  ret void
3986}
3987
3988define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
3989; GFX6-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
3990; GFX6:       ; %bb.0: ; %entry
3991; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3992; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3993; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3994; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3995; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3996; GFX6-NEXT:    s_mov_b32 s12, s5
3997; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3998; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3999; GFX6-NEXT:    s_mov_b32 s11, -1
4000; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4001; GFX6-NEXT:    s_mov_b32 s5, s12
4002; GFX6-NEXT:    s_mov_b32 s6, s11
4003; GFX6-NEXT:    s_mov_b32 s7, s10
4004; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4005; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4006; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4007; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4008; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4009; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4010; GFX6-NEXT:    s_waitcnt vmcnt(0)
4011; GFX6-NEXT:    buffer_wbinvl1
4012; GFX6-NEXT:    s_endpgm
4013;
4014; GFX7-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4015; GFX7:       ; %bb.0: ; %entry
4016; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4017; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4018; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4019; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4020; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4021; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4022; GFX7-NEXT:    s_mov_b32 s4, s8
4023; GFX7-NEXT:    s_mov_b32 s5, s9
4024; GFX7-NEXT:    s_mov_b32 s9, s10
4025; GFX7-NEXT:    s_mov_b32 s8, s11
4026; GFX7-NEXT:    s_add_u32 s4, s4, s9
4027; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4028; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4029; GFX7-NEXT:    s_mov_b32 s5, s8
4030; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4031; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4032; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4033; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4034; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4035; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4036; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4037; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4038; GFX7-NEXT:    s_waitcnt vmcnt(0)
4039; GFX7-NEXT:    buffer_wbinvl1_vol
4040; GFX7-NEXT:    s_endpgm
4041;
4042; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4043; GFX10-WGP:       ; %bb.0: ; %entry
4044; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4045; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4046; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4047; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4048; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4049; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4050; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4051; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4052; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4053; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4054; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4055; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4056; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4057; GFX10-WGP-NEXT:    buffer_gl1_inv
4058; GFX10-WGP-NEXT:    buffer_gl0_inv
4059; GFX10-WGP-NEXT:    s_endpgm
4060;
4061; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4062; GFX10-CU:       ; %bb.0: ; %entry
4063; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4064; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4065; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4066; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4067; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4068; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4069; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4070; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4071; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4072; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4073; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4074; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4075; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4076; GFX10-CU-NEXT:    buffer_gl1_inv
4077; GFX10-CU-NEXT:    buffer_gl0_inv
4078; GFX10-CU-NEXT:    s_endpgm
4079;
4080; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4081; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4082; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4083; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4084; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4085; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4086; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4087; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4088; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4089; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4090; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4091; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4092; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4093; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4094; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4095; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4096; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4097; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4098; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4099; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4100; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4101; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4102; SKIP-CACHE-INV-NEXT:    s_endpgm
4103;
4104; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4105; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4106; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4107; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4108; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4109; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4110; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4111; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4112; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4113; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4114; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4115; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4116; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4117; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4118; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4119; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4120;
4121; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4122; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4123; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4124; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4125; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4126; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4127; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4128; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4129; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4130; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4131; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4132; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4133; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4134; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4135; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4136; GFX90A-TGSPLIT-NEXT:    s_endpgm
4137;
4138; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4139; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4140; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4141; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4142; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4143; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4144; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4145; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4146; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4147; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4148; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4149; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
4150; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4151; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4152; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4153; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
4154; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4155;
4156; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4157; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4158; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4159; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4160; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4161; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4162; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4163; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4164; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4165; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4166; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4167; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
4168; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4169; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4170; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4171; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
4172; GFX940-TGSPLIT-NEXT:    s_endpgm
4173;
4174; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4175; GFX11-WGP:       ; %bb.0: ; %entry
4176; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4177; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4178; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4179; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4180; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4181; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4182; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4183; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4184; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4185; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4186; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4187; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4188; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4189; GFX11-WGP-NEXT:    buffer_gl1_inv
4190; GFX11-WGP-NEXT:    buffer_gl0_inv
4191; GFX11-WGP-NEXT:    s_endpgm
4192;
4193; GFX11-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4194; GFX11-CU:       ; %bb.0: ; %entry
4195; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4196; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4197; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4198; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4199; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4200; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4201; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4202; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4203; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4204; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4205; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4206; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4207; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4208; GFX11-CU-NEXT:    buffer_gl1_inv
4209; GFX11-CU-NEXT:    buffer_gl0_inv
4210; GFX11-CU-NEXT:    s_endpgm
4211;
4212; GFX12-WGP-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4213; GFX12-WGP:       ; %bb.0: ; %entry
4214; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4215; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4216; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4217; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4218; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4219; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4220; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4221; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4222; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4223; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
4224; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
4225; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4226; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
4227; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4228; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4229; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
4230; GFX12-WGP-NEXT:    s_endpgm
4231;
4232; GFX12-CU-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
4233; GFX12-CU:       ; %bb.0: ; %entry
4234; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4235; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4236; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4237; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4238; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4239; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4240; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4241; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4242; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4243; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
4244; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
4245; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4246; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
4247; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4248; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4249; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
4250; GFX12-CU-NEXT:    s_endpgm
4251    ptr addrspace(1) %out, i32 %in, i32 %old) {
4252entry:
4253  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4254  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
4255  ret void
4256}
4257
4258define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
4259; GFX6-LABEL: global_agent_monotonic_acquire_cmpxchg:
4260; GFX6:       ; %bb.0: ; %entry
4261; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4262; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4263; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4264; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4265; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4266; GFX6-NEXT:    s_mov_b32 s12, s5
4267; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4268; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4269; GFX6-NEXT:    s_mov_b32 s11, -1
4270; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4271; GFX6-NEXT:    s_mov_b32 s5, s12
4272; GFX6-NEXT:    s_mov_b32 s6, s11
4273; GFX6-NEXT:    s_mov_b32 s7, s10
4274; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4275; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4276; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4277; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4278; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4279; GFX6-NEXT:    s_waitcnt vmcnt(0)
4280; GFX6-NEXT:    buffer_wbinvl1
4281; GFX6-NEXT:    s_endpgm
4282;
4283; GFX7-LABEL: global_agent_monotonic_acquire_cmpxchg:
4284; GFX7:       ; %bb.0: ; %entry
4285; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4286; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4287; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4288; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4289; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4290; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4291; GFX7-NEXT:    s_mov_b32 s4, s8
4292; GFX7-NEXT:    s_mov_b32 s5, s9
4293; GFX7-NEXT:    s_mov_b32 s9, s10
4294; GFX7-NEXT:    s_mov_b32 s8, s11
4295; GFX7-NEXT:    s_add_u32 s4, s4, s9
4296; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4297; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4298; GFX7-NEXT:    s_mov_b32 s5, s8
4299; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4300; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4301; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4302; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4303; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4304; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4305; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4306; GFX7-NEXT:    s_waitcnt vmcnt(0)
4307; GFX7-NEXT:    buffer_wbinvl1_vol
4308; GFX7-NEXT:    s_endpgm
4309;
4310; GFX10-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg:
4311; GFX10-WGP:       ; %bb.0: ; %entry
4312; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4313; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4314; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4315; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4316; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4317; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4318; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4319; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4320; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4321; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4322; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4323; GFX10-WGP-NEXT:    buffer_gl1_inv
4324; GFX10-WGP-NEXT:    buffer_gl0_inv
4325; GFX10-WGP-NEXT:    s_endpgm
4326;
4327; GFX10-CU-LABEL: global_agent_monotonic_acquire_cmpxchg:
4328; GFX10-CU:       ; %bb.0: ; %entry
4329; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4330; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4331; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4332; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4333; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4334; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4335; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4336; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4337; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4338; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4339; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4340; GFX10-CU-NEXT:    buffer_gl1_inv
4341; GFX10-CU-NEXT:    buffer_gl0_inv
4342; GFX10-CU-NEXT:    s_endpgm
4343;
4344; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg:
4345; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4346; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4347; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4348; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4349; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4350; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4351; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4352; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4353; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4354; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4355; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4356; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4357; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4358; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4359; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4360; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4361; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4362; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4363; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4364; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4365; SKIP-CACHE-INV-NEXT:    s_endpgm
4366;
4367; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg:
4368; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4369; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4370; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4371; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4372; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4373; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4374; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4375; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4376; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4377; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4378; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4379; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4380; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4381; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4382;
4383; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg:
4384; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4385; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4386; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4387; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4388; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4389; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4390; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4391; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4392; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4393; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4394; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4395; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4396; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4397; GFX90A-TGSPLIT-NEXT:    s_endpgm
4398;
4399; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg:
4400; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4401; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4402; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4403; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4404; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4405; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4406; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4407; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4408; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4409; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4410; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4411; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4412; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
4413; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4414;
4415; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_cmpxchg:
4416; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4417; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4418; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4419; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4420; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4421; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4422; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4423; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4424; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4425; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4426; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4427; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4428; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
4429; GFX940-TGSPLIT-NEXT:    s_endpgm
4430;
4431; GFX11-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg:
4432; GFX11-WGP:       ; %bb.0: ; %entry
4433; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4434; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4435; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4436; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4437; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4438; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4439; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4440; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4441; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4442; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4443; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4444; GFX11-WGP-NEXT:    buffer_gl1_inv
4445; GFX11-WGP-NEXT:    buffer_gl0_inv
4446; GFX11-WGP-NEXT:    s_endpgm
4447;
4448; GFX11-CU-LABEL: global_agent_monotonic_acquire_cmpxchg:
4449; GFX11-CU:       ; %bb.0: ; %entry
4450; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4451; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4452; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4453; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4454; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4455; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4456; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4457; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4458; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4459; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4460; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4461; GFX11-CU-NEXT:    buffer_gl1_inv
4462; GFX11-CU-NEXT:    buffer_gl0_inv
4463; GFX11-CU-NEXT:    s_endpgm
4464;
4465; GFX12-WGP-LABEL: global_agent_monotonic_acquire_cmpxchg:
4466; GFX12-WGP:       ; %bb.0: ; %entry
4467; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4468; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4469; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4470; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4471; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4472; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4473; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4474; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4475; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4476; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4477; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4478; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
4479; GFX12-WGP-NEXT:    s_endpgm
4480;
4481; GFX12-CU-LABEL: global_agent_monotonic_acquire_cmpxchg:
4482; GFX12-CU:       ; %bb.0: ; %entry
4483; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4484; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4485; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4486; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4487; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4488; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4489; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4490; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4491; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4492; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4493; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4494; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
4495; GFX12-CU-NEXT:    s_endpgm
4496    ptr addrspace(1) %out, i32 %in, i32 %old) {
4497entry:
4498  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4499  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
4500  ret void
4501}
4502
4503define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
4504; GFX6-LABEL: global_agent_acquire_acquire_cmpxchg:
4505; GFX6:       ; %bb.0: ; %entry
4506; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4507; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4508; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4509; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4510; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4511; GFX6-NEXT:    s_mov_b32 s12, s5
4512; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4513; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4514; GFX6-NEXT:    s_mov_b32 s11, -1
4515; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4516; GFX6-NEXT:    s_mov_b32 s5, s12
4517; GFX6-NEXT:    s_mov_b32 s6, s11
4518; GFX6-NEXT:    s_mov_b32 s7, s10
4519; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4520; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4521; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4522; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4523; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4524; GFX6-NEXT:    s_waitcnt vmcnt(0)
4525; GFX6-NEXT:    buffer_wbinvl1
4526; GFX6-NEXT:    s_endpgm
4527;
4528; GFX7-LABEL: global_agent_acquire_acquire_cmpxchg:
4529; GFX7:       ; %bb.0: ; %entry
4530; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4531; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4532; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4533; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4534; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4535; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4536; GFX7-NEXT:    s_mov_b32 s4, s8
4537; GFX7-NEXT:    s_mov_b32 s5, s9
4538; GFX7-NEXT:    s_mov_b32 s9, s10
4539; GFX7-NEXT:    s_mov_b32 s8, s11
4540; GFX7-NEXT:    s_add_u32 s4, s4, s9
4541; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4542; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4543; GFX7-NEXT:    s_mov_b32 s5, s8
4544; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4545; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4546; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4547; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4548; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4549; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4550; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4551; GFX7-NEXT:    s_waitcnt vmcnt(0)
4552; GFX7-NEXT:    buffer_wbinvl1_vol
4553; GFX7-NEXT:    s_endpgm
4554;
4555; GFX10-WGP-LABEL: global_agent_acquire_acquire_cmpxchg:
4556; GFX10-WGP:       ; %bb.0: ; %entry
4557; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4558; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4559; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4560; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4561; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4562; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4563; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4564; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4565; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4566; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4567; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4568; GFX10-WGP-NEXT:    buffer_gl1_inv
4569; GFX10-WGP-NEXT:    buffer_gl0_inv
4570; GFX10-WGP-NEXT:    s_endpgm
4571;
4572; GFX10-CU-LABEL: global_agent_acquire_acquire_cmpxchg:
4573; GFX10-CU:       ; %bb.0: ; %entry
4574; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4575; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4576; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4577; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4578; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4579; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4580; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4581; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4582; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4583; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4584; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4585; GFX10-CU-NEXT:    buffer_gl1_inv
4586; GFX10-CU-NEXT:    buffer_gl0_inv
4587; GFX10-CU-NEXT:    s_endpgm
4588;
4589; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg:
4590; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4591; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4592; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4593; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4594; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4595; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4596; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4597; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4598; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4599; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4600; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4601; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4602; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4603; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4604; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4605; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4606; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4607; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4608; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4609; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4610; SKIP-CACHE-INV-NEXT:    s_endpgm
4611;
4612; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg:
4613; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4614; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4615; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4616; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4617; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4618; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4619; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4620; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4621; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4622; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4623; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4624; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4625; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4626; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4627;
4628; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg:
4629; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4630; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4631; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4632; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4633; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4634; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4635; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4636; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4637; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4638; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4639; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4640; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4641; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4642; GFX90A-TGSPLIT-NEXT:    s_endpgm
4643;
4644; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg:
4645; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4646; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4647; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4648; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4649; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4650; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4651; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4652; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4653; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4654; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4655; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4656; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4657; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
4658; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4659;
4660; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_cmpxchg:
4661; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4662; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4663; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4664; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4665; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4666; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4667; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4668; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4669; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4670; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4671; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4672; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4673; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
4674; GFX940-TGSPLIT-NEXT:    s_endpgm
4675;
4676; GFX11-WGP-LABEL: global_agent_acquire_acquire_cmpxchg:
4677; GFX11-WGP:       ; %bb.0: ; %entry
4678; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4679; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4680; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4681; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4682; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4683; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4684; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4685; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4686; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4687; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4688; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4689; GFX11-WGP-NEXT:    buffer_gl1_inv
4690; GFX11-WGP-NEXT:    buffer_gl0_inv
4691; GFX11-WGP-NEXT:    s_endpgm
4692;
4693; GFX11-CU-LABEL: global_agent_acquire_acquire_cmpxchg:
4694; GFX11-CU:       ; %bb.0: ; %entry
4695; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4696; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4697; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4698; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4699; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4700; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4701; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4702; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4703; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4704; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4705; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4706; GFX11-CU-NEXT:    buffer_gl1_inv
4707; GFX11-CU-NEXT:    buffer_gl0_inv
4708; GFX11-CU-NEXT:    s_endpgm
4709;
4710; GFX12-WGP-LABEL: global_agent_acquire_acquire_cmpxchg:
4711; GFX12-WGP:       ; %bb.0: ; %entry
4712; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4713; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4714; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4715; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4716; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4717; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4718; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4719; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4720; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4721; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4722; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4723; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
4724; GFX12-WGP-NEXT:    s_endpgm
4725;
4726; GFX12-CU-LABEL: global_agent_acquire_acquire_cmpxchg:
4727; GFX12-CU:       ; %bb.0: ; %entry
4728; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4729; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4730; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4731; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4732; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4733; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4734; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4735; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4736; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4737; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4738; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4739; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
4740; GFX12-CU-NEXT:    s_endpgm
4741    ptr addrspace(1) %out, i32 %in, i32 %old) {
4742entry:
4743  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4744  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
4745  ret void
4746}
4747
4748define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
4749; GFX6-LABEL: global_agent_release_acquire_cmpxchg:
4750; GFX6:       ; %bb.0: ; %entry
4751; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4752; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4753; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4754; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4755; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4756; GFX6-NEXT:    s_mov_b32 s12, s5
4757; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4758; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4759; GFX6-NEXT:    s_mov_b32 s11, -1
4760; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4761; GFX6-NEXT:    s_mov_b32 s5, s12
4762; GFX6-NEXT:    s_mov_b32 s6, s11
4763; GFX6-NEXT:    s_mov_b32 s7, s10
4764; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4765; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4766; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4767; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4768; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4769; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4770; GFX6-NEXT:    s_waitcnt vmcnt(0)
4771; GFX6-NEXT:    buffer_wbinvl1
4772; GFX6-NEXT:    s_endpgm
4773;
4774; GFX7-LABEL: global_agent_release_acquire_cmpxchg:
4775; GFX7:       ; %bb.0: ; %entry
4776; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4777; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4778; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4779; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4780; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4781; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4782; GFX7-NEXT:    s_mov_b32 s4, s8
4783; GFX7-NEXT:    s_mov_b32 s5, s9
4784; GFX7-NEXT:    s_mov_b32 s9, s10
4785; GFX7-NEXT:    s_mov_b32 s8, s11
4786; GFX7-NEXT:    s_add_u32 s4, s4, s9
4787; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4788; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4789; GFX7-NEXT:    s_mov_b32 s5, s8
4790; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4791; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4792; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4793; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4794; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4795; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4796; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4797; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4798; GFX7-NEXT:    s_waitcnt vmcnt(0)
4799; GFX7-NEXT:    buffer_wbinvl1_vol
4800; GFX7-NEXT:    s_endpgm
4801;
4802; GFX10-WGP-LABEL: global_agent_release_acquire_cmpxchg:
4803; GFX10-WGP:       ; %bb.0: ; %entry
4804; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4805; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4806; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4807; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4808; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4809; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4810; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4811; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4812; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4813; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4814; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4815; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4816; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4817; GFX10-WGP-NEXT:    buffer_gl1_inv
4818; GFX10-WGP-NEXT:    buffer_gl0_inv
4819; GFX10-WGP-NEXT:    s_endpgm
4820;
4821; GFX10-CU-LABEL: global_agent_release_acquire_cmpxchg:
4822; GFX10-CU:       ; %bb.0: ; %entry
4823; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4824; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4825; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4826; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4827; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4828; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4829; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4830; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4831; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4832; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4833; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4834; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4835; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4836; GFX10-CU-NEXT:    buffer_gl1_inv
4837; GFX10-CU-NEXT:    buffer_gl0_inv
4838; GFX10-CU-NEXT:    s_endpgm
4839;
4840; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg:
4841; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4842; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4843; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4844; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4845; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4846; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4847; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4848; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4849; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4850; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4851; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4852; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4853; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4854; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4855; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4856; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4857; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4858; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4859; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4860; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4861; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4862; SKIP-CACHE-INV-NEXT:    s_endpgm
4863;
4864; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg:
4865; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4866; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4867; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4868; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4869; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4870; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4871; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4872; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4873; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4874; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4875; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4876; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4877; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4878; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4879; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4880;
4881; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg:
4882; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4883; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4884; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4885; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4886; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4887; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4888; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4889; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4890; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4891; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4892; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4893; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4894; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4895; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4896; GFX90A-TGSPLIT-NEXT:    s_endpgm
4897;
4898; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_cmpxchg:
4899; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4900; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4901; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4902; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4903; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4904; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4905; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4906; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4907; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4908; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4909; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
4910; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4911; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4912; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4913; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
4914; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4915;
4916; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_cmpxchg:
4917; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4918; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4919; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4920; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4921; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4922; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4923; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4924; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4925; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4926; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4927; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
4928; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4929; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
4930; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4931; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
4932; GFX940-TGSPLIT-NEXT:    s_endpgm
4933;
4934; GFX11-WGP-LABEL: global_agent_release_acquire_cmpxchg:
4935; GFX11-WGP:       ; %bb.0: ; %entry
4936; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4937; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4938; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4939; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4940; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4941; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4942; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4943; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4944; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4945; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4946; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4947; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4948; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4949; GFX11-WGP-NEXT:    buffer_gl1_inv
4950; GFX11-WGP-NEXT:    buffer_gl0_inv
4951; GFX11-WGP-NEXT:    s_endpgm
4952;
4953; GFX11-CU-LABEL: global_agent_release_acquire_cmpxchg:
4954; GFX11-CU:       ; %bb.0: ; %entry
4955; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4956; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4957; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4958; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4959; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4960; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4961; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4962; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4963; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4964; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4965; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4966; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4967; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4968; GFX11-CU-NEXT:    buffer_gl1_inv
4969; GFX11-CU-NEXT:    buffer_gl0_inv
4970; GFX11-CU-NEXT:    s_endpgm
4971;
4972; GFX12-WGP-LABEL: global_agent_release_acquire_cmpxchg:
4973; GFX12-WGP:       ; %bb.0: ; %entry
4974; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4975; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4976; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4977; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4978; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4979; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4980; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4981; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4982; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4983; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
4984; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
4985; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4986; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
4987; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
4988; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4989; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
4990; GFX12-WGP-NEXT:    s_endpgm
4991;
4992; GFX12-CU-LABEL: global_agent_release_acquire_cmpxchg:
4993; GFX12-CU:       ; %bb.0: ; %entry
4994; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4995; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4996; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4997; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4998; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4999; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5000; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5001; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5002; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5003; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5004; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5005; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5006; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5007; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5008; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5009; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
5010; GFX12-CU-NEXT:    s_endpgm
5011    ptr addrspace(1) %out, i32 %in, i32 %old) {
5012entry:
5013  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5014  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire
5015  ret void
5016}
5017
5018define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
5019; GFX6-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5020; GFX6:       ; %bb.0: ; %entry
5021; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5022; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5023; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5024; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5025; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5026; GFX6-NEXT:    s_mov_b32 s12, s5
5027; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5028; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5029; GFX6-NEXT:    s_mov_b32 s11, -1
5030; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5031; GFX6-NEXT:    s_mov_b32 s5, s12
5032; GFX6-NEXT:    s_mov_b32 s6, s11
5033; GFX6-NEXT:    s_mov_b32 s7, s10
5034; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5035; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5036; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5037; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5038; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5039; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5040; GFX6-NEXT:    s_waitcnt vmcnt(0)
5041; GFX6-NEXT:    buffer_wbinvl1
5042; GFX6-NEXT:    s_endpgm
5043;
5044; GFX7-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5045; GFX7:       ; %bb.0: ; %entry
5046; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5047; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5048; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5049; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5050; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5051; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5052; GFX7-NEXT:    s_mov_b32 s4, s8
5053; GFX7-NEXT:    s_mov_b32 s5, s9
5054; GFX7-NEXT:    s_mov_b32 s9, s10
5055; GFX7-NEXT:    s_mov_b32 s8, s11
5056; GFX7-NEXT:    s_add_u32 s4, s4, s9
5057; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5058; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5059; GFX7-NEXT:    s_mov_b32 s5, s8
5060; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5061; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5062; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5063; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5064; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5065; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5066; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5067; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5068; GFX7-NEXT:    s_waitcnt vmcnt(0)
5069; GFX7-NEXT:    buffer_wbinvl1_vol
5070; GFX7-NEXT:    s_endpgm
5071;
5072; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5073; GFX10-WGP:       ; %bb.0: ; %entry
5074; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5075; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5076; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5077; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5078; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5079; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5080; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5081; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5082; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5083; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5084; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5085; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5086; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5087; GFX10-WGP-NEXT:    buffer_gl1_inv
5088; GFX10-WGP-NEXT:    buffer_gl0_inv
5089; GFX10-WGP-NEXT:    s_endpgm
5090;
5091; GFX10-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5092; GFX10-CU:       ; %bb.0: ; %entry
5093; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5094; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5095; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5096; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5097; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5098; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5099; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5100; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5101; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5102; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5103; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5104; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5105; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5106; GFX10-CU-NEXT:    buffer_gl1_inv
5107; GFX10-CU-NEXT:    buffer_gl0_inv
5108; GFX10-CU-NEXT:    s_endpgm
5109;
5110; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5111; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5112; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5113; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5114; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5115; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5116; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5117; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5118; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5119; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5120; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5121; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5122; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5123; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5124; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5125; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5126; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5127; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5128; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5129; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5130; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5131; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5132; SKIP-CACHE-INV-NEXT:    s_endpgm
5133;
5134; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5135; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5136; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5137; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5138; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5139; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5140; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5141; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5142; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5143; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5144; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5145; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5146; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5147; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5148; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5149; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5150;
5151; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5152; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5153; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5154; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5155; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5156; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5157; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5158; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5159; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5160; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5161; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5162; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5163; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5164; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5165; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5166; GFX90A-TGSPLIT-NEXT:    s_endpgm
5167;
5168; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5169; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5170; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5171; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5172; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5173; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5174; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5175; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5176; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5177; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5178; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5179; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
5180; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5181; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5182; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5183; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
5184; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5185;
5186; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5187; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5188; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5189; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5190; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5191; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5192; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5193; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5194; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5195; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5196; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5197; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
5198; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5199; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5200; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5201; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
5202; GFX940-TGSPLIT-NEXT:    s_endpgm
5203;
5204; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5205; GFX11-WGP:       ; %bb.0: ; %entry
5206; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5207; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5208; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5209; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5210; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5211; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5212; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5213; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5214; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5215; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5216; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5217; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5218; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5219; GFX11-WGP-NEXT:    buffer_gl1_inv
5220; GFX11-WGP-NEXT:    buffer_gl0_inv
5221; GFX11-WGP-NEXT:    s_endpgm
5222;
5223; GFX11-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5224; GFX11-CU:       ; %bb.0: ; %entry
5225; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5226; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5227; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5228; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5229; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5230; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5231; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5232; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5233; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5234; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5235; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5236; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5237; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5238; GFX11-CU-NEXT:    buffer_gl1_inv
5239; GFX11-CU-NEXT:    buffer_gl0_inv
5240; GFX11-CU-NEXT:    s_endpgm
5241;
5242; GFX12-WGP-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5243; GFX12-WGP:       ; %bb.0: ; %entry
5244; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5245; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5246; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5247; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5248; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5249; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5250; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5251; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5252; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5253; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5254; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5255; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5256; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5257; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5258; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5259; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
5260; GFX12-WGP-NEXT:    s_endpgm
5261;
5262; GFX12-CU-LABEL: global_agent_acq_rel_acquire_cmpxchg:
5263; GFX12-CU:       ; %bb.0: ; %entry
5264; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5265; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5266; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5267; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5268; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5269; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5270; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5271; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5272; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5273; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5274; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5275; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5276; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5277; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5278; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5279; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
5280; GFX12-CU-NEXT:    s_endpgm
5281    ptr addrspace(1) %out, i32 %in, i32 %old) {
5282entry:
5283  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5284  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
5285  ret void
5286}
5287
5288define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
5289; GFX6-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5290; GFX6:       ; %bb.0: ; %entry
5291; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5292; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5293; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5294; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5295; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5296; GFX6-NEXT:    s_mov_b32 s12, s5
5297; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5298; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5299; GFX6-NEXT:    s_mov_b32 s11, -1
5300; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5301; GFX6-NEXT:    s_mov_b32 s5, s12
5302; GFX6-NEXT:    s_mov_b32 s6, s11
5303; GFX6-NEXT:    s_mov_b32 s7, s10
5304; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5305; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5306; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5307; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5308; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5309; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5310; GFX6-NEXT:    s_waitcnt vmcnt(0)
5311; GFX6-NEXT:    buffer_wbinvl1
5312; GFX6-NEXT:    s_endpgm
5313;
5314; GFX7-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5315; GFX7:       ; %bb.0: ; %entry
5316; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5317; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5318; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5319; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5320; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5321; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5322; GFX7-NEXT:    s_mov_b32 s4, s8
5323; GFX7-NEXT:    s_mov_b32 s5, s9
5324; GFX7-NEXT:    s_mov_b32 s9, s10
5325; GFX7-NEXT:    s_mov_b32 s8, s11
5326; GFX7-NEXT:    s_add_u32 s4, s4, s9
5327; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5328; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5329; GFX7-NEXT:    s_mov_b32 s5, s8
5330; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5331; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5332; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5333; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5334; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5335; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5336; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5337; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5338; GFX7-NEXT:    s_waitcnt vmcnt(0)
5339; GFX7-NEXT:    buffer_wbinvl1_vol
5340; GFX7-NEXT:    s_endpgm
5341;
5342; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5343; GFX10-WGP:       ; %bb.0: ; %entry
5344; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5345; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5346; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5347; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5348; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5349; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5350; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5351; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5352; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5353; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5354; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5355; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5356; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5357; GFX10-WGP-NEXT:    buffer_gl1_inv
5358; GFX10-WGP-NEXT:    buffer_gl0_inv
5359; GFX10-WGP-NEXT:    s_endpgm
5360;
5361; GFX10-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5362; GFX10-CU:       ; %bb.0: ; %entry
5363; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5364; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5365; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5366; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5367; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5368; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5369; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5370; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5371; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5372; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5373; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5374; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5375; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5376; GFX10-CU-NEXT:    buffer_gl1_inv
5377; GFX10-CU-NEXT:    buffer_gl0_inv
5378; GFX10-CU-NEXT:    s_endpgm
5379;
5380; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5381; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5382; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5383; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5384; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5385; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5386; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5387; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5388; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5389; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5390; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5391; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5392; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5393; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5394; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5395; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5396; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5397; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5398; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5399; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5400; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5401; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5402; SKIP-CACHE-INV-NEXT:    s_endpgm
5403;
5404; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5405; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5406; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5407; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5408; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5409; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5410; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5411; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5412; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5413; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5414; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5415; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5416; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5417; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5418; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5419; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5420;
5421; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5422; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5423; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5424; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5425; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5426; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5427; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5428; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5429; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5430; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5431; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5432; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5433; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5434; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5435; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5436; GFX90A-TGSPLIT-NEXT:    s_endpgm
5437;
5438; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5439; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5440; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5441; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5442; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5443; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5444; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5445; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5446; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5447; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5448; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5449; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
5450; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5451; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5452; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5453; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
5454; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5455;
5456; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5457; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5458; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5459; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5460; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5461; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5462; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5463; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5464; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5465; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5466; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5467; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
5468; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5469; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5470; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5471; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
5472; GFX940-TGSPLIT-NEXT:    s_endpgm
5473;
5474; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5475; GFX11-WGP:       ; %bb.0: ; %entry
5476; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5477; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5478; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5479; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5480; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5481; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5482; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5483; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5484; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5485; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5486; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5487; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5488; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5489; GFX11-WGP-NEXT:    buffer_gl1_inv
5490; GFX11-WGP-NEXT:    buffer_gl0_inv
5491; GFX11-WGP-NEXT:    s_endpgm
5492;
5493; GFX11-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5494; GFX11-CU:       ; %bb.0: ; %entry
5495; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5496; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5497; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5498; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5499; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5500; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5501; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5502; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5503; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5504; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5505; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5506; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5507; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5508; GFX11-CU-NEXT:    buffer_gl1_inv
5509; GFX11-CU-NEXT:    buffer_gl0_inv
5510; GFX11-CU-NEXT:    s_endpgm
5511;
5512; GFX12-WGP-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5513; GFX12-WGP:       ; %bb.0: ; %entry
5514; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5515; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5516; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5517; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5518; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5519; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5520; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5521; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5522; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5523; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5524; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5525; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5526; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5527; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5528; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5529; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
5530; GFX12-WGP-NEXT:    s_endpgm
5531;
5532; GFX12-CU-LABEL: global_agent_seq_cst_acquire_cmpxchg:
5533; GFX12-CU:       ; %bb.0: ; %entry
5534; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5535; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5536; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5537; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5538; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5539; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5540; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5541; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5542; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5543; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5544; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5545; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5546; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5547; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5548; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5549; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
5550; GFX12-CU-NEXT:    s_endpgm
5551    ptr addrspace(1) %out, i32 %in, i32 %old) {
5552entry:
5553  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5554  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
5555  ret void
5556}
5557
5558define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
5559; GFX6-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5560; GFX6:       ; %bb.0: ; %entry
5561; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5562; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5563; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5564; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5565; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5566; GFX6-NEXT:    s_mov_b32 s12, s5
5567; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5568; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5569; GFX6-NEXT:    s_mov_b32 s11, -1
5570; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5571; GFX6-NEXT:    s_mov_b32 s5, s12
5572; GFX6-NEXT:    s_mov_b32 s6, s11
5573; GFX6-NEXT:    s_mov_b32 s7, s10
5574; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5575; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5576; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5577; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5578; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5579; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5580; GFX6-NEXT:    s_waitcnt vmcnt(0)
5581; GFX6-NEXT:    buffer_wbinvl1
5582; GFX6-NEXT:    s_endpgm
5583;
5584; GFX7-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5585; GFX7:       ; %bb.0: ; %entry
5586; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5587; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5588; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5589; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5590; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5591; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5592; GFX7-NEXT:    s_mov_b32 s4, s8
5593; GFX7-NEXT:    s_mov_b32 s5, s9
5594; GFX7-NEXT:    s_mov_b32 s9, s10
5595; GFX7-NEXT:    s_mov_b32 s8, s11
5596; GFX7-NEXT:    s_add_u32 s4, s4, s9
5597; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5598; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5599; GFX7-NEXT:    s_mov_b32 s5, s8
5600; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5601; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5602; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5603; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5604; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5605; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5606; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5607; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5608; GFX7-NEXT:    s_waitcnt vmcnt(0)
5609; GFX7-NEXT:    buffer_wbinvl1_vol
5610; GFX7-NEXT:    s_endpgm
5611;
5612; GFX10-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5613; GFX10-WGP:       ; %bb.0: ; %entry
5614; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5615; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5616; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5617; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5618; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5619; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5620; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5621; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5622; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5623; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5624; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5625; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5626; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5627; GFX10-WGP-NEXT:    buffer_gl1_inv
5628; GFX10-WGP-NEXT:    buffer_gl0_inv
5629; GFX10-WGP-NEXT:    s_endpgm
5630;
5631; GFX10-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5632; GFX10-CU:       ; %bb.0: ; %entry
5633; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5634; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5635; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5636; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5637; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5638; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5639; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5640; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5641; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5642; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5643; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5644; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5645; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5646; GFX10-CU-NEXT:    buffer_gl1_inv
5647; GFX10-CU-NEXT:    buffer_gl0_inv
5648; GFX10-CU-NEXT:    s_endpgm
5649;
5650; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5651; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5652; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5653; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5654; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5655; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5656; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5657; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5658; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5659; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5660; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5661; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5662; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5663; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5664; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5665; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5666; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5667; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5668; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5669; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5670; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5671; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5672; SKIP-CACHE-INV-NEXT:    s_endpgm
5673;
5674; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5675; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5676; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5677; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5678; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5679; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5680; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5681; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5682; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5683; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5684; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5685; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5686; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5687; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5688; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5689; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5690;
5691; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5692; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5693; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5694; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5695; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5696; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5697; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5698; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5699; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5700; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5701; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5702; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5703; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5704; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5705; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5706; GFX90A-TGSPLIT-NEXT:    s_endpgm
5707;
5708; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5709; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5710; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5711; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5712; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5713; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5714; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5715; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5716; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5717; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5718; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5719; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
5720; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5721; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5722; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5723; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
5724; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5725;
5726; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5727; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5728; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5729; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5730; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5731; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5732; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5733; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5734; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5735; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5736; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5737; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
5738; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5739; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5740; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5741; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
5742; GFX940-TGSPLIT-NEXT:    s_endpgm
5743;
5744; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5745; GFX11-WGP:       ; %bb.0: ; %entry
5746; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5747; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5748; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5749; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5750; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5751; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5752; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5753; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5754; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5755; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5756; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5757; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5758; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5759; GFX11-WGP-NEXT:    buffer_gl1_inv
5760; GFX11-WGP-NEXT:    buffer_gl0_inv
5761; GFX11-WGP-NEXT:    s_endpgm
5762;
5763; GFX11-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5764; GFX11-CU:       ; %bb.0: ; %entry
5765; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5766; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5767; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5768; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5769; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5770; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5771; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5772; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5773; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5774; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5775; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5776; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5777; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5778; GFX11-CU-NEXT:    buffer_gl1_inv
5779; GFX11-CU-NEXT:    buffer_gl0_inv
5780; GFX11-CU-NEXT:    s_endpgm
5781;
5782; GFX12-WGP-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5783; GFX12-WGP:       ; %bb.0: ; %entry
5784; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5785; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5786; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5787; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5788; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5789; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5790; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5791; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5792; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5793; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5794; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5795; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5796; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5797; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5798; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5799; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
5800; GFX12-WGP-NEXT:    s_endpgm
5801;
5802; GFX12-CU-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
5803; GFX12-CU:       ; %bb.0: ; %entry
5804; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5805; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5806; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5807; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5808; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5809; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5810; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5811; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5812; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5813; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5814; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5815; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5816; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5817; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
5818; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5819; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
5820; GFX12-CU-NEXT:    s_endpgm
5821    ptr addrspace(1) %out, i32 %in, i32 %old) {
5822entry:
5823  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5824  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
5825  ret void
5826}
5827
5828define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
5829; GFX6-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5830; GFX6:       ; %bb.0: ; %entry
5831; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5832; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5833; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5834; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5835; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5836; GFX6-NEXT:    s_mov_b32 s12, s5
5837; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5838; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5839; GFX6-NEXT:    s_mov_b32 s11, -1
5840; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5841; GFX6-NEXT:    s_mov_b32 s5, s12
5842; GFX6-NEXT:    s_mov_b32 s6, s11
5843; GFX6-NEXT:    s_mov_b32 s7, s10
5844; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5845; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5846; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5847; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5848; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5849; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5850; GFX6-NEXT:    s_waitcnt vmcnt(0)
5851; GFX6-NEXT:    buffer_wbinvl1
5852; GFX6-NEXT:    s_endpgm
5853;
5854; GFX7-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5855; GFX7:       ; %bb.0: ; %entry
5856; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5857; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5858; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5859; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5860; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5861; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5862; GFX7-NEXT:    s_mov_b32 s4, s8
5863; GFX7-NEXT:    s_mov_b32 s5, s9
5864; GFX7-NEXT:    s_mov_b32 s9, s10
5865; GFX7-NEXT:    s_mov_b32 s8, s11
5866; GFX7-NEXT:    s_add_u32 s4, s4, s9
5867; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5868; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5869; GFX7-NEXT:    s_mov_b32 s5, s8
5870; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5871; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5872; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5873; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5874; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5875; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5876; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5877; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5878; GFX7-NEXT:    s_waitcnt vmcnt(0)
5879; GFX7-NEXT:    buffer_wbinvl1_vol
5880; GFX7-NEXT:    s_endpgm
5881;
5882; GFX10-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5883; GFX10-WGP:       ; %bb.0: ; %entry
5884; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5885; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5886; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5887; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5888; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5889; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5890; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5891; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5892; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5893; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5894; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5895; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5896; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5897; GFX10-WGP-NEXT:    buffer_gl1_inv
5898; GFX10-WGP-NEXT:    buffer_gl0_inv
5899; GFX10-WGP-NEXT:    s_endpgm
5900;
5901; GFX10-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5902; GFX10-CU:       ; %bb.0: ; %entry
5903; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5904; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5905; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5906; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5907; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5908; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5909; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5910; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5911; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5912; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5913; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5914; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5915; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5916; GFX10-CU-NEXT:    buffer_gl1_inv
5917; GFX10-CU-NEXT:    buffer_gl0_inv
5918; GFX10-CU-NEXT:    s_endpgm
5919;
5920; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5921; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5922; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5923; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5924; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5925; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5926; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5927; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5928; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5929; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5930; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5931; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5932; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5933; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5934; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5935; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5936; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5937; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5938; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5939; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5940; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5941; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5942; SKIP-CACHE-INV-NEXT:    s_endpgm
5943;
5944; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5945; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5946; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5947; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5948; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5949; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5950; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5951; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5952; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5953; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5954; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5955; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5956; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5957; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5958; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5959; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5960;
5961; GFX90A-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5962; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5963; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5964; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5965; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5966; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5967; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5968; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5969; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5970; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5971; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5972; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5973; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5974; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5975; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5976; GFX90A-TGSPLIT-NEXT:    s_endpgm
5977;
5978; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5979; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5980; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5981; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5982; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5983; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5984; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5985; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5986; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5987; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5988; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5989; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
5990; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5991; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
5992; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5993; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
5994; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5995;
5996; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_cmpxchg:
5997; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5998; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5999; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6000; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6001; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6002; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6003; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6004; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6005; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6006; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6007; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
6008; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6009; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6010; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6011; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
6012; GFX940-TGSPLIT-NEXT:    s_endpgm
6013;
6014; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg:
6015; GFX11-WGP:       ; %bb.0: ; %entry
6016; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6017; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6018; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6019; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6020; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6021; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6022; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6023; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6024; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6025; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6026; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6027; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6028; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6029; GFX11-WGP-NEXT:    buffer_gl1_inv
6030; GFX11-WGP-NEXT:    buffer_gl0_inv
6031; GFX11-WGP-NEXT:    s_endpgm
6032;
6033; GFX11-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg:
6034; GFX11-CU:       ; %bb.0: ; %entry
6035; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6036; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6037; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6038; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6039; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6040; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6041; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6042; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6043; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6044; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6045; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6046; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6047; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6048; GFX11-CU-NEXT:    buffer_gl1_inv
6049; GFX11-CU-NEXT:    buffer_gl0_inv
6050; GFX11-CU-NEXT:    s_endpgm
6051;
6052; GFX12-WGP-LABEL: global_agent_acquire_seq_cst_cmpxchg:
6053; GFX12-WGP:       ; %bb.0: ; %entry
6054; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6055; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6056; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6057; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6058; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6059; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6060; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6061; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6062; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6063; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6064; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6065; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6066; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6067; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6068; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6069; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
6070; GFX12-WGP-NEXT:    s_endpgm
6071;
6072; GFX12-CU-LABEL: global_agent_acquire_seq_cst_cmpxchg:
6073; GFX12-CU:       ; %bb.0: ; %entry
6074; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6075; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6076; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6077; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6078; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6079; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6080; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6081; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6082; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6083; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6084; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6085; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6086; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
6087; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6088; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6089; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
6090; GFX12-CU-NEXT:    s_endpgm
6091    ptr addrspace(1) %out, i32 %in, i32 %old) {
6092entry:
6093  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6094  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
6095  ret void
6096}
6097
6098define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
6099; GFX6-LABEL: global_agent_release_seq_cst_cmpxchg:
6100; GFX6:       ; %bb.0: ; %entry
6101; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6102; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6103; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6104; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6105; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6106; GFX6-NEXT:    s_mov_b32 s12, s5
6107; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6108; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6109; GFX6-NEXT:    s_mov_b32 s11, -1
6110; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6111; GFX6-NEXT:    s_mov_b32 s5, s12
6112; GFX6-NEXT:    s_mov_b32 s6, s11
6113; GFX6-NEXT:    s_mov_b32 s7, s10
6114; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6115; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6116; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6117; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6118; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6119; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
6120; GFX6-NEXT:    s_waitcnt vmcnt(0)
6121; GFX6-NEXT:    buffer_wbinvl1
6122; GFX6-NEXT:    s_endpgm
6123;
6124; GFX7-LABEL: global_agent_release_seq_cst_cmpxchg:
6125; GFX7:       ; %bb.0: ; %entry
6126; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
6127; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
6128; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
6129; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
6130; GFX7-NEXT:    s_mov_b64 s[10:11], 16
6131; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6132; GFX7-NEXT:    s_mov_b32 s4, s8
6133; GFX7-NEXT:    s_mov_b32 s5, s9
6134; GFX7-NEXT:    s_mov_b32 s9, s10
6135; GFX7-NEXT:    s_mov_b32 s8, s11
6136; GFX7-NEXT:    s_add_u32 s4, s4, s9
6137; GFX7-NEXT:    s_addc_u32 s8, s5, s8
6138; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
6139; GFX7-NEXT:    s_mov_b32 s5, s8
6140; GFX7-NEXT:    v_mov_b32_e32 v2, s7
6141; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6142; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6143; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6144; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6145; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6146; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6147; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
6148; GFX7-NEXT:    s_waitcnt vmcnt(0)
6149; GFX7-NEXT:    buffer_wbinvl1_vol
6150; GFX7-NEXT:    s_endpgm
6151;
6152; GFX10-WGP-LABEL: global_agent_release_seq_cst_cmpxchg:
6153; GFX10-WGP:       ; %bb.0: ; %entry
6154; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6155; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6156; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6157; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6158; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6159; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6160; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6161; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6162; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6163; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6164; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6165; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6166; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6167; GFX10-WGP-NEXT:    buffer_gl1_inv
6168; GFX10-WGP-NEXT:    buffer_gl0_inv
6169; GFX10-WGP-NEXT:    s_endpgm
6170;
6171; GFX10-CU-LABEL: global_agent_release_seq_cst_cmpxchg:
6172; GFX10-CU:       ; %bb.0: ; %entry
6173; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6174; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6175; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6176; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6177; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6178; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6179; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6180; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6181; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6182; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6183; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6184; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6185; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6186; GFX10-CU-NEXT:    buffer_gl1_inv
6187; GFX10-CU-NEXT:    buffer_gl0_inv
6188; GFX10-CU-NEXT:    s_endpgm
6189;
6190; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg:
6191; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6192; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6193; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6194; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6195; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6196; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6197; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6198; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6199; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6200; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6201; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6202; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6203; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6204; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6205; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6206; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6207; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6208; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6209; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6210; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
6211; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6212; SKIP-CACHE-INV-NEXT:    s_endpgm
6213;
6214; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg:
6215; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6216; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6217; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6218; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6219; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6220; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6221; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6222; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6223; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6224; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6225; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6226; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6227; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6228; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6229; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6230;
6231; GFX90A-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg:
6232; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6233; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6234; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6235; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6236; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6237; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6238; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6239; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6240; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6241; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6242; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6243; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6244; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6245; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6246; GFX90A-TGSPLIT-NEXT:    s_endpgm
6247;
6248; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg:
6249; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6250; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6251; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6252; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6253; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6254; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6255; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6256; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6257; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6258; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6259; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
6260; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6261; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6262; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6263; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
6264; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6265;
6266; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_cmpxchg:
6267; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6268; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6269; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6270; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6271; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6272; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6273; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6274; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6275; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6276; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6277; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
6278; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6279; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6280; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6281; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
6282; GFX940-TGSPLIT-NEXT:    s_endpgm
6283;
6284; GFX11-WGP-LABEL: global_agent_release_seq_cst_cmpxchg:
6285; GFX11-WGP:       ; %bb.0: ; %entry
6286; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6287; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6288; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6289; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6290; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6291; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6292; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6293; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6294; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6295; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6296; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6297; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6298; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6299; GFX11-WGP-NEXT:    buffer_gl1_inv
6300; GFX11-WGP-NEXT:    buffer_gl0_inv
6301; GFX11-WGP-NEXT:    s_endpgm
6302;
6303; GFX11-CU-LABEL: global_agent_release_seq_cst_cmpxchg:
6304; GFX11-CU:       ; %bb.0: ; %entry
6305; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6306; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6307; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6308; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6309; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6310; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6311; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6312; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6313; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6314; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6315; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6316; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6317; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6318; GFX11-CU-NEXT:    buffer_gl1_inv
6319; GFX11-CU-NEXT:    buffer_gl0_inv
6320; GFX11-CU-NEXT:    s_endpgm
6321;
6322; GFX12-WGP-LABEL: global_agent_release_seq_cst_cmpxchg:
6323; GFX12-WGP:       ; %bb.0: ; %entry
6324; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6325; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6326; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6327; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6328; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6329; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6330; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6331; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6332; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6333; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6334; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6335; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6336; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6337; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6338; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6339; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
6340; GFX12-WGP-NEXT:    s_endpgm
6341;
6342; GFX12-CU-LABEL: global_agent_release_seq_cst_cmpxchg:
6343; GFX12-CU:       ; %bb.0: ; %entry
6344; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6345; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6346; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6347; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6348; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6349; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6350; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6351; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6352; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6353; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6354; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6355; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6356; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
6357; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6358; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6359; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
6360; GFX12-CU-NEXT:    s_endpgm
6361    ptr addrspace(1) %out, i32 %in, i32 %old) {
6362entry:
6363  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6364  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
6365  ret void
6366}
6367
6368define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
6369; GFX6-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6370; GFX6:       ; %bb.0: ; %entry
6371; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6372; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6373; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6374; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6375; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6376; GFX6-NEXT:    s_mov_b32 s12, s5
6377; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6378; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6379; GFX6-NEXT:    s_mov_b32 s11, -1
6380; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6381; GFX6-NEXT:    s_mov_b32 s5, s12
6382; GFX6-NEXT:    s_mov_b32 s6, s11
6383; GFX6-NEXT:    s_mov_b32 s7, s10
6384; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6385; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6386; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6387; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6388; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6389; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
6390; GFX6-NEXT:    s_waitcnt vmcnt(0)
6391; GFX6-NEXT:    buffer_wbinvl1
6392; GFX6-NEXT:    s_endpgm
6393;
6394; GFX7-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6395; GFX7:       ; %bb.0: ; %entry
6396; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
6397; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
6398; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
6399; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
6400; GFX7-NEXT:    s_mov_b64 s[10:11], 16
6401; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6402; GFX7-NEXT:    s_mov_b32 s4, s8
6403; GFX7-NEXT:    s_mov_b32 s5, s9
6404; GFX7-NEXT:    s_mov_b32 s9, s10
6405; GFX7-NEXT:    s_mov_b32 s8, s11
6406; GFX7-NEXT:    s_add_u32 s4, s4, s9
6407; GFX7-NEXT:    s_addc_u32 s8, s5, s8
6408; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
6409; GFX7-NEXT:    s_mov_b32 s5, s8
6410; GFX7-NEXT:    v_mov_b32_e32 v2, s7
6411; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6412; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6413; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6414; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6415; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6416; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6417; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
6418; GFX7-NEXT:    s_waitcnt vmcnt(0)
6419; GFX7-NEXT:    buffer_wbinvl1_vol
6420; GFX7-NEXT:    s_endpgm
6421;
6422; GFX10-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6423; GFX10-WGP:       ; %bb.0: ; %entry
6424; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6425; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6426; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6427; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6428; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6429; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6430; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6431; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6432; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6433; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6434; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6435; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6436; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6437; GFX10-WGP-NEXT:    buffer_gl1_inv
6438; GFX10-WGP-NEXT:    buffer_gl0_inv
6439; GFX10-WGP-NEXT:    s_endpgm
6440;
6441; GFX10-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6442; GFX10-CU:       ; %bb.0: ; %entry
6443; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6444; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6445; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6446; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6447; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6448; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6449; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6450; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6451; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6452; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6453; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6454; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6455; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6456; GFX10-CU-NEXT:    buffer_gl1_inv
6457; GFX10-CU-NEXT:    buffer_gl0_inv
6458; GFX10-CU-NEXT:    s_endpgm
6459;
6460; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6461; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6462; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6463; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6464; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6465; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6466; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6467; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6468; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6469; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6470; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6471; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6472; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6473; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6474; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6475; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6476; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6477; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6478; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6479; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6480; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
6481; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6482; SKIP-CACHE-INV-NEXT:    s_endpgm
6483;
6484; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6485; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6486; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6487; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6488; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6489; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6490; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6491; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6492; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6493; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6494; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6495; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6496; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6497; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6498; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6499; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6500;
6501; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6502; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6503; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6504; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6505; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6506; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6507; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6508; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6509; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6510; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6511; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6512; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6513; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6514; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6515; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6516; GFX90A-TGSPLIT-NEXT:    s_endpgm
6517;
6518; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6519; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6520; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6521; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6522; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6523; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6524; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6525; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6526; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6527; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6528; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6529; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
6530; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6531; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6532; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6533; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
6534; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6535;
6536; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6537; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6538; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6539; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6540; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6541; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6542; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6543; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6544; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6545; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6546; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6547; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
6548; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6549; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6550; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6551; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
6552; GFX940-TGSPLIT-NEXT:    s_endpgm
6553;
6554; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6555; GFX11-WGP:       ; %bb.0: ; %entry
6556; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6557; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6558; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6559; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6560; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6561; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6562; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6563; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6564; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6565; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6566; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6567; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6568; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6569; GFX11-WGP-NEXT:    buffer_gl1_inv
6570; GFX11-WGP-NEXT:    buffer_gl0_inv
6571; GFX11-WGP-NEXT:    s_endpgm
6572;
6573; GFX11-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6574; GFX11-CU:       ; %bb.0: ; %entry
6575; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6576; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6577; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6578; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6579; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6580; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6581; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6582; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6583; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6584; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6585; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6586; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6587; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6588; GFX11-CU-NEXT:    buffer_gl1_inv
6589; GFX11-CU-NEXT:    buffer_gl0_inv
6590; GFX11-CU-NEXT:    s_endpgm
6591;
6592; GFX12-WGP-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6593; GFX12-WGP:       ; %bb.0: ; %entry
6594; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6595; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6596; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6597; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6598; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6599; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6600; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6601; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6602; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6603; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6604; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6605; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6606; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6607; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6608; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6609; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
6610; GFX12-WGP-NEXT:    s_endpgm
6611;
6612; GFX12-CU-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
6613; GFX12-CU:       ; %bb.0: ; %entry
6614; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6615; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6616; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6617; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6618; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6619; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6620; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6621; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6622; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6623; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6624; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6625; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6626; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
6627; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6628; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6629; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
6630; GFX12-CU-NEXT:    s_endpgm
6631    ptr addrspace(1) %out, i32 %in, i32 %old) {
6632entry:
6633  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6634  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
6635  ret void
6636}
6637
6638define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
6639; GFX6-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6640; GFX6:       ; %bb.0: ; %entry
6641; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6642; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6643; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6644; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6645; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6646; GFX6-NEXT:    s_mov_b32 s12, s5
6647; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6648; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6649; GFX6-NEXT:    s_mov_b32 s11, -1
6650; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6651; GFX6-NEXT:    s_mov_b32 s5, s12
6652; GFX6-NEXT:    s_mov_b32 s6, s11
6653; GFX6-NEXT:    s_mov_b32 s7, s10
6654; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6655; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6656; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6657; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6658; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6659; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
6660; GFX6-NEXT:    s_waitcnt vmcnt(0)
6661; GFX6-NEXT:    buffer_wbinvl1
6662; GFX6-NEXT:    s_endpgm
6663;
6664; GFX7-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6665; GFX7:       ; %bb.0: ; %entry
6666; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
6667; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
6668; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
6669; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
6670; GFX7-NEXT:    s_mov_b64 s[10:11], 16
6671; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6672; GFX7-NEXT:    s_mov_b32 s4, s8
6673; GFX7-NEXT:    s_mov_b32 s5, s9
6674; GFX7-NEXT:    s_mov_b32 s9, s10
6675; GFX7-NEXT:    s_mov_b32 s8, s11
6676; GFX7-NEXT:    s_add_u32 s4, s4, s9
6677; GFX7-NEXT:    s_addc_u32 s8, s5, s8
6678; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
6679; GFX7-NEXT:    s_mov_b32 s5, s8
6680; GFX7-NEXT:    v_mov_b32_e32 v2, s7
6681; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6682; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6683; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6684; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6685; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6686; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6687; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
6688; GFX7-NEXT:    s_waitcnt vmcnt(0)
6689; GFX7-NEXT:    buffer_wbinvl1_vol
6690; GFX7-NEXT:    s_endpgm
6691;
6692; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6693; GFX10-WGP:       ; %bb.0: ; %entry
6694; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6695; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6696; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6697; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6698; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6699; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6700; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6701; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6702; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6703; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6704; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6705; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6706; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6707; GFX10-WGP-NEXT:    buffer_gl1_inv
6708; GFX10-WGP-NEXT:    buffer_gl0_inv
6709; GFX10-WGP-NEXT:    s_endpgm
6710;
6711; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6712; GFX10-CU:       ; %bb.0: ; %entry
6713; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6714; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6715; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6716; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6717; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6718; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6719; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6720; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6721; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6722; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6723; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6724; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
6725; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6726; GFX10-CU-NEXT:    buffer_gl1_inv
6727; GFX10-CU-NEXT:    buffer_gl0_inv
6728; GFX10-CU-NEXT:    s_endpgm
6729;
6730; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6731; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6732; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6733; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6734; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6735; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6736; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6737; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6738; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6739; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6740; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6741; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6742; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6743; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6744; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6745; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6746; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6747; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6748; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6749; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6750; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
6751; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6752; SKIP-CACHE-INV-NEXT:    s_endpgm
6753;
6754; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6755; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6756; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6757; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6758; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6759; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6760; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6761; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6762; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6763; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6764; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6765; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6766; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6767; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6768; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6769; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6770;
6771; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6772; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6773; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6774; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6775; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6776; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6777; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6778; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6779; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6780; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6781; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6782; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6783; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
6784; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6785; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6786; GFX90A-TGSPLIT-NEXT:    s_endpgm
6787;
6788; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6789; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6790; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6791; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6792; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6793; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6794; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6795; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6796; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6797; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6798; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6799; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
6800; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6801; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6802; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6803; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
6804; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6805;
6806; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6807; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6808; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6809; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6810; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6811; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6812; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6813; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6814; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6815; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6816; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6817; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
6818; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6819; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
6820; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6821; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
6822; GFX940-TGSPLIT-NEXT:    s_endpgm
6823;
6824; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6825; GFX11-WGP:       ; %bb.0: ; %entry
6826; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6827; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6828; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6829; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6830; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6831; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6832; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6833; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6834; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6835; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6836; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6837; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6838; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6839; GFX11-WGP-NEXT:    buffer_gl1_inv
6840; GFX11-WGP-NEXT:    buffer_gl0_inv
6841; GFX11-WGP-NEXT:    s_endpgm
6842;
6843; GFX11-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6844; GFX11-CU:       ; %bb.0: ; %entry
6845; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6846; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6847; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6848; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6849; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6850; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6851; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6852; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6853; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6854; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6855; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6856; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
6857; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6858; GFX11-CU-NEXT:    buffer_gl1_inv
6859; GFX11-CU-NEXT:    buffer_gl0_inv
6860; GFX11-CU-NEXT:    s_endpgm
6861;
6862; GFX12-WGP-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6863; GFX12-WGP:       ; %bb.0: ; %entry
6864; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6865; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6866; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6867; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6868; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6869; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6870; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6871; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6872; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6873; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6874; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6875; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6876; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6877; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6878; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6879; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
6880; GFX12-WGP-NEXT:    s_endpgm
6881;
6882; GFX12-CU-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
6883; GFX12-CU:       ; %bb.0: ; %entry
6884; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6885; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6886; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6887; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6888; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6889; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6890; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6891; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6892; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6893; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6894; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6895; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6896; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
6897; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
6898; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6899; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
6900; GFX12-CU-NEXT:    s_endpgm
6901    ptr addrspace(1) %out, i32 %in, i32 %old) {
6902entry:
6903  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6904  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
6905  ret void
6906}
6907
6908define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
6909; GFX6-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
6910; GFX6:       ; %bb.0: ; %entry
6911; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6912; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6913; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6914; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6915; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6916; GFX6-NEXT:    s_mov_b32 s12, s5
6917; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6918; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6919; GFX6-NEXT:    s_mov_b32 s11, -1
6920; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6921; GFX6-NEXT:    s_mov_b32 s5, s12
6922; GFX6-NEXT:    s_mov_b32 s6, s11
6923; GFX6-NEXT:    s_mov_b32 s7, s10
6924; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6925; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6926; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6927; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6928; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6929; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6930; GFX6-NEXT:    s_waitcnt vmcnt(0)
6931; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
6932; GFX6-NEXT:    s_endpgm
6933;
6934; GFX7-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
6935; GFX7:       ; %bb.0: ; %entry
6936; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
6937; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6938; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
6939; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
6940; GFX7-NEXT:    s_mov_b64 s[12:13], 16
6941; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6942; GFX7-NEXT:    s_mov_b32 s6, s4
6943; GFX7-NEXT:    s_mov_b32 s7, s5
6944; GFX7-NEXT:    s_mov_b32 s11, s12
6945; GFX7-NEXT:    s_mov_b32 s10, s13
6946; GFX7-NEXT:    s_add_u32 s6, s6, s11
6947; GFX7-NEXT:    s_addc_u32 s10, s7, s10
6948; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
6949; GFX7-NEXT:    s_mov_b32 s7, s10
6950; GFX7-NEXT:    v_mov_b32_e32 v2, s9
6951; GFX7-NEXT:    v_mov_b32_e32 v0, s8
6952; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6953; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6954; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6955; GFX7-NEXT:    v_mov_b32_e32 v1, s7
6956; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6957; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6958; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6959; GFX7-NEXT:    s_waitcnt vmcnt(0)
6960; GFX7-NEXT:    flat_store_dword v[0:1], v2
6961; GFX7-NEXT:    s_endpgm
6962;
6963; GFX10-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
6964; GFX10-WGP:       ; %bb.0: ; %entry
6965; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6966; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6967; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6968; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6969; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6970; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6971; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6972; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6973; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6974; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6975; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
6976; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
6977; GFX10-WGP-NEXT:    s_endpgm
6978;
6979; GFX10-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
6980; GFX10-CU:       ; %bb.0: ; %entry
6981; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6982; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6983; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6984; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6985; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6986; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6987; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6988; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6989; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6990; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6991; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
6992; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
6993; GFX10-CU-NEXT:    s_endpgm
6994;
6995; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
6996; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6997; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6998; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6999; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7000; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7001; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7002; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7003; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7004; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7005; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7006; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7007; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7008; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7009; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7010; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7011; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7012; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7013; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7014; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7015; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7016; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7017; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7018; SKIP-CACHE-INV-NEXT:    s_endpgm
7019;
7020; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7021; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7022; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7023; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7024; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7025; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7026; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7027; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7028; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7029; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7030; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7031; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7032; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7033; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7034; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7035;
7036; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7037; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7038; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7039; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7040; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7041; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7042; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7043; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7044; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7045; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7046; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7047; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7048; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7049; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7050; GFX90A-TGSPLIT-NEXT:    s_endpgm
7051;
7052; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7053; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7054; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7055; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7056; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7057; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7058; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7059; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7060; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7061; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7062; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7063; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7064; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7065; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7066; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7067;
7068; GFX940-TGSPLIT-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7069; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7070; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7071; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7072; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7073; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7074; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7075; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7076; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7077; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7078; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7079; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7080; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7081; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7082; GFX940-TGSPLIT-NEXT:    s_endpgm
7083;
7084; GFX11-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7085; GFX11-WGP:       ; %bb.0: ; %entry
7086; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7087; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7088; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7089; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7090; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7091; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7092; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7093; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7094; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7095; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7096; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7097; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7098; GFX11-WGP-NEXT:    s_endpgm
7099;
7100; GFX11-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7101; GFX11-CU:       ; %bb.0: ; %entry
7102; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7103; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7104; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7105; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7106; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7107; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7108; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7109; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7110; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7111; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7112; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7113; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7114; GFX11-CU-NEXT:    s_endpgm
7115;
7116; GFX12-WGP-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7117; GFX12-WGP:       ; %bb.0: ; %entry
7118; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7119; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7120; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7121; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7122; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7123; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7124; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7125; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7126; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7127; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7128; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7129; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7130; GFX12-WGP-NEXT:    s_endpgm
7131;
7132; GFX12-CU-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
7133; GFX12-CU:       ; %bb.0: ; %entry
7134; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7135; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7136; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7137; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7138; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7139; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7140; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7141; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7142; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7143; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7144; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7145; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7146; GFX12-CU-NEXT:    s_endpgm
7147    ptr addrspace(1) %out, i32 %in, i32 %old) {
7148entry:
7149  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7150  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic
7151  %val0 = extractvalue { i32, i1 } %val, 0
7152  store i32 %val0, ptr addrspace(1) %out, align 4
7153  ret void
7154}
7155
7156define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
7157; GFX6-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7158; GFX6:       ; %bb.0: ; %entry
7159; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7160; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7161; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7162; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7163; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7164; GFX6-NEXT:    s_mov_b32 s12, s5
7165; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7166; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7167; GFX6-NEXT:    s_mov_b32 s11, -1
7168; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7169; GFX6-NEXT:    s_mov_b32 s5, s12
7170; GFX6-NEXT:    s_mov_b32 s6, s11
7171; GFX6-NEXT:    s_mov_b32 s7, s10
7172; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7173; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7174; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7175; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7176; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7177; GFX6-NEXT:    s_waitcnt vmcnt(0)
7178; GFX6-NEXT:    buffer_wbinvl1
7179; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7180; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7181; GFX6-NEXT:    s_endpgm
7182;
7183; GFX7-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7184; GFX7:       ; %bb.0: ; %entry
7185; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7186; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7187; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7188; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7189; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7190; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7191; GFX7-NEXT:    s_mov_b32 s6, s4
7192; GFX7-NEXT:    s_mov_b32 s7, s5
7193; GFX7-NEXT:    s_mov_b32 s11, s12
7194; GFX7-NEXT:    s_mov_b32 s10, s13
7195; GFX7-NEXT:    s_add_u32 s6, s6, s11
7196; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7197; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7198; GFX7-NEXT:    s_mov_b32 s7, s10
7199; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7200; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7201; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7202; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7203; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7204; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7205; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7206; GFX7-NEXT:    s_waitcnt vmcnt(0)
7207; GFX7-NEXT:    buffer_wbinvl1_vol
7208; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7209; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7210; GFX7-NEXT:    flat_store_dword v[0:1], v2
7211; GFX7-NEXT:    s_endpgm
7212;
7213; GFX10-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7214; GFX10-WGP:       ; %bb.0: ; %entry
7215; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7216; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7217; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7218; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7219; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7220; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7221; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7222; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7223; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7224; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7225; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7226; GFX10-WGP-NEXT:    buffer_gl1_inv
7227; GFX10-WGP-NEXT:    buffer_gl0_inv
7228; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7229; GFX10-WGP-NEXT:    s_endpgm
7230;
7231; GFX10-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7232; GFX10-CU:       ; %bb.0: ; %entry
7233; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7234; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7235; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7236; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7237; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7238; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7239; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7240; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7241; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7242; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7243; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7244; GFX10-CU-NEXT:    buffer_gl1_inv
7245; GFX10-CU-NEXT:    buffer_gl0_inv
7246; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7247; GFX10-CU-NEXT:    s_endpgm
7248;
7249; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7250; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7251; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7252; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7253; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7254; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7255; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7256; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7257; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7258; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7259; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7260; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7261; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7262; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7263; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7264; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7265; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7266; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7267; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7268; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7269; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7270; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7271; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7272; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7273; SKIP-CACHE-INV-NEXT:    s_endpgm
7274;
7275; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7276; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7277; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7278; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7279; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7280; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7281; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7282; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7283; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7284; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7285; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7286; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7287; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7288; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
7289; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7290; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7291;
7292; GFX90A-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7293; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7294; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7295; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7296; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7297; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7298; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7299; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7300; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7301; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7302; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7303; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7304; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7305; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
7306; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7307; GFX90A-TGSPLIT-NEXT:    s_endpgm
7308;
7309; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7310; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7311; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7312; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7313; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7314; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7315; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7316; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7317; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7318; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7319; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7320; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7321; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7322; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
7323; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7324; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7325;
7326; GFX940-TGSPLIT-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7327; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7328; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7329; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7330; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7331; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7332; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7333; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7334; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7335; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7336; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7337; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7338; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7339; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
7340; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7341; GFX940-TGSPLIT-NEXT:    s_endpgm
7342;
7343; GFX11-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7344; GFX11-WGP:       ; %bb.0: ; %entry
7345; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7346; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7347; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7348; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7349; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7350; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7351; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7352; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7353; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7354; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7355; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7356; GFX11-WGP-NEXT:    buffer_gl1_inv
7357; GFX11-WGP-NEXT:    buffer_gl0_inv
7358; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7359; GFX11-WGP-NEXT:    s_endpgm
7360;
7361; GFX11-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7362; GFX11-CU:       ; %bb.0: ; %entry
7363; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7364; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7365; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7366; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7367; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7368; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7369; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7370; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7371; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7372; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7373; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7374; GFX11-CU-NEXT:    buffer_gl1_inv
7375; GFX11-CU-NEXT:    buffer_gl0_inv
7376; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7377; GFX11-CU-NEXT:    s_endpgm
7378;
7379; GFX12-WGP-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7380; GFX12-WGP:       ; %bb.0: ; %entry
7381; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7382; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7383; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7384; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7385; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7386; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7387; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7388; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7389; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7390; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7391; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7392; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
7393; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7394; GFX12-WGP-NEXT:    s_endpgm
7395;
7396; GFX12-CU-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
7397; GFX12-CU:       ; %bb.0: ; %entry
7398; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7399; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7400; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7401; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7402; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7403; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7404; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7405; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7406; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7407; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7408; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7409; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
7410; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7411; GFX12-CU-NEXT:    s_endpgm
7412    ptr addrspace(1) %out, i32 %in, i32 %old) {
7413entry:
7414  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7415  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic
7416  %val0 = extractvalue { i32, i1 } %val, 0
7417  store i32 %val0, ptr addrspace(1) %out, align 4
7418  ret void
7419}
7420
7421define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
7422; GFX6-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7423; GFX6:       ; %bb.0: ; %entry
7424; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7425; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7426; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7427; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7428; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7429; GFX6-NEXT:    s_mov_b32 s12, s5
7430; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7431; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7432; GFX6-NEXT:    s_mov_b32 s11, -1
7433; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7434; GFX6-NEXT:    s_mov_b32 s5, s12
7435; GFX6-NEXT:    s_mov_b32 s6, s11
7436; GFX6-NEXT:    s_mov_b32 s7, s10
7437; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7438; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7439; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7440; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7441; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7442; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7443; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7444; GFX6-NEXT:    s_waitcnt vmcnt(0)
7445; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7446; GFX6-NEXT:    s_endpgm
7447;
7448; GFX7-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7449; GFX7:       ; %bb.0: ; %entry
7450; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7451; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7452; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7453; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7454; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7455; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7456; GFX7-NEXT:    s_mov_b32 s6, s4
7457; GFX7-NEXT:    s_mov_b32 s7, s5
7458; GFX7-NEXT:    s_mov_b32 s11, s12
7459; GFX7-NEXT:    s_mov_b32 s10, s13
7460; GFX7-NEXT:    s_add_u32 s6, s6, s11
7461; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7462; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7463; GFX7-NEXT:    s_mov_b32 s7, s10
7464; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7465; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7466; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7467; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7468; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7469; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7470; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7471; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7472; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7473; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7474; GFX7-NEXT:    s_waitcnt vmcnt(0)
7475; GFX7-NEXT:    flat_store_dword v[0:1], v2
7476; GFX7-NEXT:    s_endpgm
7477;
7478; GFX10-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7479; GFX10-WGP:       ; %bb.0: ; %entry
7480; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7481; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7482; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7483; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7484; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7485; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7486; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7487; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7488; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7489; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7490; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7491; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7492; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7493; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7494; GFX10-WGP-NEXT:    s_endpgm
7495;
7496; GFX10-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7497; GFX10-CU:       ; %bb.0: ; %entry
7498; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7499; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7500; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7501; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7502; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7503; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7504; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7505; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7506; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7507; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7508; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7509; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7510; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7511; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7512; GFX10-CU-NEXT:    s_endpgm
7513;
7514; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7515; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7516; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7517; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7518; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7519; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7520; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7521; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7522; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7523; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7524; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7525; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7526; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7527; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7528; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7529; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7530; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7531; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7532; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7533; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7534; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7535; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7536; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7537; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7538; SKIP-CACHE-INV-NEXT:    s_endpgm
7539;
7540; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7541; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7542; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7543; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7544; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7545; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7546; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7547; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7548; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7549; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7550; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7551; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7552; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7553; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7554; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7555; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7556;
7557; GFX90A-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7558; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7559; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7560; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7561; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7562; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7563; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7564; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7565; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7566; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7567; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7568; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7569; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7570; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7571; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7572; GFX90A-TGSPLIT-NEXT:    s_endpgm
7573;
7574; GFX940-NOTTGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7575; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7576; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7577; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7578; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7579; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7580; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7581; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7582; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7583; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7584; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7585; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
7586; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7587; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7588; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7589; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7590; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7591;
7592; GFX940-TGSPLIT-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7593; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7594; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7595; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7596; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7597; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7598; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7599; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7600; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7601; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7602; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7603; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
7604; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7605; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7606; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7607; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7608; GFX940-TGSPLIT-NEXT:    s_endpgm
7609;
7610; GFX11-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7611; GFX11-WGP:       ; %bb.0: ; %entry
7612; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7613; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7614; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7615; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7616; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7617; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7618; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7619; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7620; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7621; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7622; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7623; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7624; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7625; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7626; GFX11-WGP-NEXT:    s_endpgm
7627;
7628; GFX11-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7629; GFX11-CU:       ; %bb.0: ; %entry
7630; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7631; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7632; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7633; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7634; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7635; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7636; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7637; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7638; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7639; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7640; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7641; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7642; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7643; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7644; GFX11-CU-NEXT:    s_endpgm
7645;
7646; GFX12-WGP-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7647; GFX12-WGP:       ; %bb.0: ; %entry
7648; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7649; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7650; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7651; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7652; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7653; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7654; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7655; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7656; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7657; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7658; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7659; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
7660; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
7661; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7662; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7663; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7664; GFX12-WGP-NEXT:    s_endpgm
7665;
7666; GFX12-CU-LABEL: global_agent_release_monotonic_ret_cmpxchg:
7667; GFX12-CU:       ; %bb.0: ; %entry
7668; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7669; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7670; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7671; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7672; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7673; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7674; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7675; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7676; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7677; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7678; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7679; GFX12-CU-NEXT:    s_wait_storecnt 0x0
7680; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
7681; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7682; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7683; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7684; GFX12-CU-NEXT:    s_endpgm
7685    ptr addrspace(1) %out, i32 %in, i32 %old) {
7686entry:
7687  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7688  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release monotonic
7689  %val0 = extractvalue { i32, i1 } %val, 0
7690  store i32 %val0, ptr addrspace(1) %out, align 4
7691  ret void
7692}
7693
7694define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
7695; GFX6-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7696; GFX6:       ; %bb.0: ; %entry
7697; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7698; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7699; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7700; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7701; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7702; GFX6-NEXT:    s_mov_b32 s12, s5
7703; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7704; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7705; GFX6-NEXT:    s_mov_b32 s11, -1
7706; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7707; GFX6-NEXT:    s_mov_b32 s5, s12
7708; GFX6-NEXT:    s_mov_b32 s6, s11
7709; GFX6-NEXT:    s_mov_b32 s7, s10
7710; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7711; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7712; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7713; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7714; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7715; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7716; GFX6-NEXT:    s_waitcnt vmcnt(0)
7717; GFX6-NEXT:    buffer_wbinvl1
7718; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7719; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7720; GFX6-NEXT:    s_endpgm
7721;
7722; GFX7-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7723; GFX7:       ; %bb.0: ; %entry
7724; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7725; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7726; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7727; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7728; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7729; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7730; GFX7-NEXT:    s_mov_b32 s6, s4
7731; GFX7-NEXT:    s_mov_b32 s7, s5
7732; GFX7-NEXT:    s_mov_b32 s11, s12
7733; GFX7-NEXT:    s_mov_b32 s10, s13
7734; GFX7-NEXT:    s_add_u32 s6, s6, s11
7735; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7736; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7737; GFX7-NEXT:    s_mov_b32 s7, s10
7738; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7739; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7740; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7741; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7742; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7743; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7744; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7745; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7746; GFX7-NEXT:    s_waitcnt vmcnt(0)
7747; GFX7-NEXT:    buffer_wbinvl1_vol
7748; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7749; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7750; GFX7-NEXT:    flat_store_dword v[0:1], v2
7751; GFX7-NEXT:    s_endpgm
7752;
7753; GFX10-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7754; GFX10-WGP:       ; %bb.0: ; %entry
7755; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7756; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7757; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7758; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7759; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7760; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7761; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7762; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7763; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7764; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7765; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7766; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7767; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7768; GFX10-WGP-NEXT:    buffer_gl1_inv
7769; GFX10-WGP-NEXT:    buffer_gl0_inv
7770; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7771; GFX10-WGP-NEXT:    s_endpgm
7772;
7773; GFX10-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7774; GFX10-CU:       ; %bb.0: ; %entry
7775; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7776; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7777; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7778; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7779; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7780; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7781; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7782; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7783; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7784; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7785; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7786; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7787; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7788; GFX10-CU-NEXT:    buffer_gl1_inv
7789; GFX10-CU-NEXT:    buffer_gl0_inv
7790; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7791; GFX10-CU-NEXT:    s_endpgm
7792;
7793; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7794; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7795; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7796; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7797; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7798; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7799; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7800; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7801; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7802; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7803; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7804; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7805; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7806; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7808; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7809; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7810; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7811; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7812; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7813; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7814; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7815; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7816; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7817; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7818; SKIP-CACHE-INV-NEXT:    s_endpgm
7819;
7820; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7821; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7822; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7823; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7824; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7825; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7826; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7827; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7828; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7829; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7830; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7831; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7832; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7833; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7834; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
7835; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7836; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7837;
7838; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7839; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7840; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7841; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7842; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7843; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7844; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7845; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7846; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7847; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7848; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7849; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7850; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7851; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7852; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
7853; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7854; GFX90A-TGSPLIT-NEXT:    s_endpgm
7855;
7856; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7857; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7858; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7859; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7860; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7861; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7862; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7863; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7864; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7865; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7866; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7867; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
7868; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7869; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7870; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7871; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
7872; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7873; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7874;
7875; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7876; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7877; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7878; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7879; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7880; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7881; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7882; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7883; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7884; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7885; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7886; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
7887; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7888; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
7889; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7890; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
7891; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7892; GFX940-TGSPLIT-NEXT:    s_endpgm
7893;
7894; GFX11-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7895; GFX11-WGP:       ; %bb.0: ; %entry
7896; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7897; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7898; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7899; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7900; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7901; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7902; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7903; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7904; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7905; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7906; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7907; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7908; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7909; GFX11-WGP-NEXT:    buffer_gl1_inv
7910; GFX11-WGP-NEXT:    buffer_gl0_inv
7911; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7912; GFX11-WGP-NEXT:    s_endpgm
7913;
7914; GFX11-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7915; GFX11-CU:       ; %bb.0: ; %entry
7916; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7917; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7918; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7919; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7920; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7921; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7922; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7923; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7924; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7925; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7926; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7927; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7928; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7929; GFX11-CU-NEXT:    buffer_gl1_inv
7930; GFX11-CU-NEXT:    buffer_gl0_inv
7931; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7932; GFX11-CU-NEXT:    s_endpgm
7933;
7934; GFX12-WGP-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7935; GFX12-WGP:       ; %bb.0: ; %entry
7936; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7937; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7938; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7939; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7940; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7941; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7942; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7943; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7944; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7945; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7946; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7947; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
7948; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
7949; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7950; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7951; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7952; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7953; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
7954; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7955; GFX12-WGP-NEXT:    s_endpgm
7956;
7957; GFX12-CU-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
7958; GFX12-CU:       ; %bb.0: ; %entry
7959; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7960; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7961; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7962; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7963; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7964; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7965; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7966; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7967; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7968; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7969; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7970; GFX12-CU-NEXT:    s_wait_storecnt 0x0
7971; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
7972; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
7973; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7974; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7975; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7976; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
7977; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7978; GFX12-CU-NEXT:    s_endpgm
7979    ptr addrspace(1) %out, i32 %in, i32 %old) {
7980entry:
7981  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7982  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic
7983  %val0 = extractvalue { i32, i1 } %val, 0
7984  store i32 %val0, ptr addrspace(1) %out, align 4
7985  ret void
7986}
7987
7988define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
7989; GFX6-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
7990; GFX6:       ; %bb.0: ; %entry
7991; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7992; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7993; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7994; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7995; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7996; GFX6-NEXT:    s_mov_b32 s12, s5
7997; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7998; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7999; GFX6-NEXT:    s_mov_b32 s11, -1
8000; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8001; GFX6-NEXT:    s_mov_b32 s5, s12
8002; GFX6-NEXT:    s_mov_b32 s6, s11
8003; GFX6-NEXT:    s_mov_b32 s7, s10
8004; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8005; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8006; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8007; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8008; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8009; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8010; GFX6-NEXT:    s_waitcnt vmcnt(0)
8011; GFX6-NEXT:    buffer_wbinvl1
8012; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8013; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8014; GFX6-NEXT:    s_endpgm
8015;
8016; GFX7-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8017; GFX7:       ; %bb.0: ; %entry
8018; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8019; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8020; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8021; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8022; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8023; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8024; GFX7-NEXT:    s_mov_b32 s6, s4
8025; GFX7-NEXT:    s_mov_b32 s7, s5
8026; GFX7-NEXT:    s_mov_b32 s11, s12
8027; GFX7-NEXT:    s_mov_b32 s10, s13
8028; GFX7-NEXT:    s_add_u32 s6, s6, s11
8029; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8030; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8031; GFX7-NEXT:    s_mov_b32 s7, s10
8032; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8033; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8034; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8035; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8036; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8037; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8038; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8039; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8040; GFX7-NEXT:    s_waitcnt vmcnt(0)
8041; GFX7-NEXT:    buffer_wbinvl1_vol
8042; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8043; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8044; GFX7-NEXT:    flat_store_dword v[0:1], v2
8045; GFX7-NEXT:    s_endpgm
8046;
8047; GFX10-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8048; GFX10-WGP:       ; %bb.0: ; %entry
8049; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8050; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8051; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8052; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8053; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8054; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8055; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8056; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8057; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8058; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8059; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8060; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8061; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8062; GFX10-WGP-NEXT:    buffer_gl1_inv
8063; GFX10-WGP-NEXT:    buffer_gl0_inv
8064; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8065; GFX10-WGP-NEXT:    s_endpgm
8066;
8067; GFX10-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8068; GFX10-CU:       ; %bb.0: ; %entry
8069; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8070; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8071; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8072; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8073; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8074; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8075; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8076; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8077; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8078; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8079; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8080; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8081; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8082; GFX10-CU-NEXT:    buffer_gl1_inv
8083; GFX10-CU-NEXT:    buffer_gl0_inv
8084; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8085; GFX10-CU-NEXT:    s_endpgm
8086;
8087; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8088; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8089; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8090; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8091; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8092; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8093; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8094; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8095; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8096; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8097; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8098; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8099; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8100; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8101; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8102; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8103; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8104; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8105; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8106; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8107; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8108; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8109; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8110; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8111; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8112; SKIP-CACHE-INV-NEXT:    s_endpgm
8113;
8114; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8115; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8116; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8117; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8118; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8119; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8120; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8121; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8122; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8123; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8124; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8125; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8126; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8127; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8128; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8129; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8130; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8131;
8132; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8133; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8134; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8135; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8136; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8137; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8138; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8139; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8140; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8141; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8142; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8143; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8144; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8145; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8146; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8147; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8148; GFX90A-TGSPLIT-NEXT:    s_endpgm
8149;
8150; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8151; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8152; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8153; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8154; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8155; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8156; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8157; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8158; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8159; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8160; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8161; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
8162; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8163; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8164; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8165; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
8166; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8167; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8168;
8169; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8170; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8171; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8172; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8173; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8174; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8175; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8176; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8177; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8178; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8179; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8180; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
8181; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8182; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8183; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8184; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
8185; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8186; GFX940-TGSPLIT-NEXT:    s_endpgm
8187;
8188; GFX11-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8189; GFX11-WGP:       ; %bb.0: ; %entry
8190; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8191; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8192; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8193; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8194; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8195; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8196; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8197; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8198; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8199; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8200; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8201; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8202; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8203; GFX11-WGP-NEXT:    buffer_gl1_inv
8204; GFX11-WGP-NEXT:    buffer_gl0_inv
8205; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8206; GFX11-WGP-NEXT:    s_endpgm
8207;
8208; GFX11-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8209; GFX11-CU:       ; %bb.0: ; %entry
8210; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8211; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8212; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8213; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8214; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8215; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8216; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8217; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8218; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8219; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8220; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8221; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8222; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8223; GFX11-CU-NEXT:    buffer_gl1_inv
8224; GFX11-CU-NEXT:    buffer_gl0_inv
8225; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8226; GFX11-CU-NEXT:    s_endpgm
8227;
8228; GFX12-WGP-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8229; GFX12-WGP:       ; %bb.0: ; %entry
8230; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8231; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8232; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8233; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8234; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8235; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8236; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8237; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8238; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8239; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8240; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8241; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
8242; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
8243; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8244; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8245; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8246; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8247; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
8248; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8249; GFX12-WGP-NEXT:    s_endpgm
8250;
8251; GFX12-CU-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
8252; GFX12-CU:       ; %bb.0: ; %entry
8253; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8254; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8255; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8256; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8257; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8258; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8259; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8260; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8261; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8262; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8263; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8264; GFX12-CU-NEXT:    s_wait_storecnt 0x0
8265; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
8266; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8267; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8268; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8269; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8270; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
8271; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8272; GFX12-CU-NEXT:    s_endpgm
8273    ptr addrspace(1) %out, i32 %in, i32 %old) {
8274entry:
8275  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8276  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic
8277  %val0 = extractvalue { i32, i1 } %val, 0
8278  store i32 %val0, ptr addrspace(1) %out, align 4
8279  ret void
8280}
8281
8282define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
8283; GFX6-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8284; GFX6:       ; %bb.0: ; %entry
8285; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8286; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8287; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8288; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8290; GFX6-NEXT:    s_mov_b32 s12, s5
8291; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8292; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8293; GFX6-NEXT:    s_mov_b32 s11, -1
8294; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8295; GFX6-NEXT:    s_mov_b32 s5, s12
8296; GFX6-NEXT:    s_mov_b32 s6, s11
8297; GFX6-NEXT:    s_mov_b32 s7, s10
8298; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8299; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8300; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8301; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8302; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8303; GFX6-NEXT:    s_waitcnt vmcnt(0)
8304; GFX6-NEXT:    buffer_wbinvl1
8305; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8306; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8307; GFX6-NEXT:    s_endpgm
8308;
8309; GFX7-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8310; GFX7:       ; %bb.0: ; %entry
8311; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8312; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8313; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8314; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8315; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8316; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8317; GFX7-NEXT:    s_mov_b32 s6, s4
8318; GFX7-NEXT:    s_mov_b32 s7, s5
8319; GFX7-NEXT:    s_mov_b32 s11, s12
8320; GFX7-NEXT:    s_mov_b32 s10, s13
8321; GFX7-NEXT:    s_add_u32 s6, s6, s11
8322; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8323; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8324; GFX7-NEXT:    s_mov_b32 s7, s10
8325; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8326; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8327; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8328; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8329; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8330; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8331; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8332; GFX7-NEXT:    s_waitcnt vmcnt(0)
8333; GFX7-NEXT:    buffer_wbinvl1_vol
8334; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8335; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8336; GFX7-NEXT:    flat_store_dword v[0:1], v2
8337; GFX7-NEXT:    s_endpgm
8338;
8339; GFX10-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8340; GFX10-WGP:       ; %bb.0: ; %entry
8341; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8342; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8343; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8344; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8345; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8346; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8347; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8348; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8349; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8350; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8351; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8352; GFX10-WGP-NEXT:    buffer_gl1_inv
8353; GFX10-WGP-NEXT:    buffer_gl0_inv
8354; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8355; GFX10-WGP-NEXT:    s_endpgm
8356;
8357; GFX10-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8358; GFX10-CU:       ; %bb.0: ; %entry
8359; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8360; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8361; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8362; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8363; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8364; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8365; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8366; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8367; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8368; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8369; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8370; GFX10-CU-NEXT:    buffer_gl1_inv
8371; GFX10-CU-NEXT:    buffer_gl0_inv
8372; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8373; GFX10-CU-NEXT:    s_endpgm
8374;
8375; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8376; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8377; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8378; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8379; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8380; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8381; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8382; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8383; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8384; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8385; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8386; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8387; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8388; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8389; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8390; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8391; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8392; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8393; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8394; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8395; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8396; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8397; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8398; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8399; SKIP-CACHE-INV-NEXT:    s_endpgm
8400;
8401; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8402; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8403; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8404; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8405; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8406; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8407; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8408; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8409; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8410; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8411; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8412; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8413; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8414; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8415; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8416; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8417;
8418; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8419; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8420; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8421; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8422; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8423; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8424; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8425; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8426; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8427; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8428; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8429; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8430; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8431; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8432; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8433; GFX90A-TGSPLIT-NEXT:    s_endpgm
8434;
8435; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8436; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8437; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8438; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8439; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8440; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8441; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8442; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8443; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8444; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8445; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8446; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8447; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8448; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
8449; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8450; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8451;
8452; GFX940-TGSPLIT-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8453; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8454; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8455; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8456; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8457; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8458; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8459; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8460; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8461; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8462; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8463; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8464; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8465; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
8466; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8467; GFX940-TGSPLIT-NEXT:    s_endpgm
8468;
8469; GFX11-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8470; GFX11-WGP:       ; %bb.0: ; %entry
8471; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8472; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8473; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8474; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8475; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8476; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8477; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8478; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8479; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8480; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8481; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8482; GFX11-WGP-NEXT:    buffer_gl1_inv
8483; GFX11-WGP-NEXT:    buffer_gl0_inv
8484; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8485; GFX11-WGP-NEXT:    s_endpgm
8486;
8487; GFX11-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8488; GFX11-CU:       ; %bb.0: ; %entry
8489; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8490; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8491; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8492; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8493; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8494; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8495; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8496; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8497; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8498; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8499; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8500; GFX11-CU-NEXT:    buffer_gl1_inv
8501; GFX11-CU-NEXT:    buffer_gl0_inv
8502; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8503; GFX11-CU-NEXT:    s_endpgm
8504;
8505; GFX12-WGP-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8506; GFX12-WGP:       ; %bb.0: ; %entry
8507; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8508; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8509; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8510; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8511; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8512; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8513; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8514; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8515; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8516; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8517; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8518; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8519; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8520; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
8521; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8522; GFX12-WGP-NEXT:    s_endpgm
8523;
8524; GFX12-CU-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
8525; GFX12-CU:       ; %bb.0: ; %entry
8526; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8527; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8528; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8529; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8530; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8531; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8532; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8533; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8534; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8535; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8536; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8537; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8538; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8539; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
8540; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8541; GFX12-CU-NEXT:    s_endpgm
8542    ptr addrspace(1) %out, i32 %in, i32 %old) {
8543entry:
8544  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8545  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire
8546  %val0 = extractvalue { i32, i1 } %val, 0
8547  store i32 %val0, ptr addrspace(1) %out, align 4
8548  ret void
8549}
8550
8551define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
8552; GFX6-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8553; GFX6:       ; %bb.0: ; %entry
8554; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8555; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8556; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8557; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8558; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8559; GFX6-NEXT:    s_mov_b32 s12, s5
8560; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8561; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8562; GFX6-NEXT:    s_mov_b32 s11, -1
8563; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8564; GFX6-NEXT:    s_mov_b32 s5, s12
8565; GFX6-NEXT:    s_mov_b32 s6, s11
8566; GFX6-NEXT:    s_mov_b32 s7, s10
8567; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8568; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8569; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8570; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8571; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8572; GFX6-NEXT:    s_waitcnt vmcnt(0)
8573; GFX6-NEXT:    buffer_wbinvl1
8574; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8575; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8576; GFX6-NEXT:    s_endpgm
8577;
8578; GFX7-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8579; GFX7:       ; %bb.0: ; %entry
8580; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8581; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8582; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8583; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8584; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8585; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8586; GFX7-NEXT:    s_mov_b32 s6, s4
8587; GFX7-NEXT:    s_mov_b32 s7, s5
8588; GFX7-NEXT:    s_mov_b32 s11, s12
8589; GFX7-NEXT:    s_mov_b32 s10, s13
8590; GFX7-NEXT:    s_add_u32 s6, s6, s11
8591; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8592; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8593; GFX7-NEXT:    s_mov_b32 s7, s10
8594; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8595; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8596; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8597; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8598; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8599; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8600; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8601; GFX7-NEXT:    s_waitcnt vmcnt(0)
8602; GFX7-NEXT:    buffer_wbinvl1_vol
8603; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8604; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8605; GFX7-NEXT:    flat_store_dword v[0:1], v2
8606; GFX7-NEXT:    s_endpgm
8607;
8608; GFX10-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8609; GFX10-WGP:       ; %bb.0: ; %entry
8610; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8611; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8612; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8613; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8614; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8615; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8616; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8617; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8618; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8619; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8620; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8621; GFX10-WGP-NEXT:    buffer_gl1_inv
8622; GFX10-WGP-NEXT:    buffer_gl0_inv
8623; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8624; GFX10-WGP-NEXT:    s_endpgm
8625;
8626; GFX10-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8627; GFX10-CU:       ; %bb.0: ; %entry
8628; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8629; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8630; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8631; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8632; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8633; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8634; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8635; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8636; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8637; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8638; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8639; GFX10-CU-NEXT:    buffer_gl1_inv
8640; GFX10-CU-NEXT:    buffer_gl0_inv
8641; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8642; GFX10-CU-NEXT:    s_endpgm
8643;
8644; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8645; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8646; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8647; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8648; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8649; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8650; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8651; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8652; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8653; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8654; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8655; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8656; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8657; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8658; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8659; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8660; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8661; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8662; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8663; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8664; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8665; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8666; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8667; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8668; SKIP-CACHE-INV-NEXT:    s_endpgm
8669;
8670; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8671; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8672; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8673; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8674; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8675; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8676; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8677; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8678; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8679; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8680; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8681; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8682; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8683; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8684; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8685; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8686;
8687; GFX90A-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8688; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8689; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8690; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8691; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8692; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8693; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8694; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8695; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8696; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8697; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8698; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8699; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8700; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8701; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8702; GFX90A-TGSPLIT-NEXT:    s_endpgm
8703;
8704; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8705; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8706; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8707; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8708; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8709; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8710; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8711; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8712; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8713; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8714; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8715; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8716; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8717; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
8718; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8719; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8720;
8721; GFX940-TGSPLIT-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8722; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8723; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8724; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8725; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8726; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8727; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8728; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8729; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8730; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8731; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8732; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8733; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8734; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
8735; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8736; GFX940-TGSPLIT-NEXT:    s_endpgm
8737;
8738; GFX11-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8739; GFX11-WGP:       ; %bb.0: ; %entry
8740; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8741; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8742; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8743; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8744; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8745; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8746; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8747; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8748; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8749; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8750; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8751; GFX11-WGP-NEXT:    buffer_gl1_inv
8752; GFX11-WGP-NEXT:    buffer_gl0_inv
8753; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8754; GFX11-WGP-NEXT:    s_endpgm
8755;
8756; GFX11-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8757; GFX11-CU:       ; %bb.0: ; %entry
8758; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8759; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8760; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8761; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8762; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8763; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8764; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8765; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8766; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8767; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8768; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8769; GFX11-CU-NEXT:    buffer_gl1_inv
8770; GFX11-CU-NEXT:    buffer_gl0_inv
8771; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8772; GFX11-CU-NEXT:    s_endpgm
8773;
8774; GFX12-WGP-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8775; GFX12-WGP:       ; %bb.0: ; %entry
8776; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8777; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8778; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8779; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8780; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8781; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8782; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8783; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8784; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8785; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8786; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8787; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
8788; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8789; GFX12-WGP-NEXT:    s_endpgm
8790;
8791; GFX12-CU-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
8792; GFX12-CU:       ; %bb.0: ; %entry
8793; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8794; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8795; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8796; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8797; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8798; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8799; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8800; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8801; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8802; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
8803; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8804; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
8805; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8806; GFX12-CU-NEXT:    s_endpgm
8807    ptr addrspace(1) %out, i32 %in, i32 %old) {
8808entry:
8809  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8810  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire acquire
8811  %val0 = extractvalue { i32, i1 } %val, 0
8812  store i32 %val0, ptr addrspace(1) %out, align 4
8813  ret void
8814}
8815
8816define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
8817; GFX6-LABEL: global_agent_release_acquire_ret_cmpxchg:
8818; GFX6:       ; %bb.0: ; %entry
8819; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8820; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8821; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8822; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8823; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8824; GFX6-NEXT:    s_mov_b32 s12, s5
8825; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8826; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8827; GFX6-NEXT:    s_mov_b32 s11, -1
8828; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8829; GFX6-NEXT:    s_mov_b32 s5, s12
8830; GFX6-NEXT:    s_mov_b32 s6, s11
8831; GFX6-NEXT:    s_mov_b32 s7, s10
8832; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8833; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8834; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8835; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8836; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8837; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8838; GFX6-NEXT:    s_waitcnt vmcnt(0)
8839; GFX6-NEXT:    buffer_wbinvl1
8840; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8841; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8842; GFX6-NEXT:    s_endpgm
8843;
8844; GFX7-LABEL: global_agent_release_acquire_ret_cmpxchg:
8845; GFX7:       ; %bb.0: ; %entry
8846; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8847; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8848; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8849; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8850; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8851; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8852; GFX7-NEXT:    s_mov_b32 s6, s4
8853; GFX7-NEXT:    s_mov_b32 s7, s5
8854; GFX7-NEXT:    s_mov_b32 s11, s12
8855; GFX7-NEXT:    s_mov_b32 s10, s13
8856; GFX7-NEXT:    s_add_u32 s6, s6, s11
8857; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8858; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8859; GFX7-NEXT:    s_mov_b32 s7, s10
8860; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8861; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8862; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8863; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8864; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8865; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8866; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8867; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8868; GFX7-NEXT:    s_waitcnt vmcnt(0)
8869; GFX7-NEXT:    buffer_wbinvl1_vol
8870; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8871; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8872; GFX7-NEXT:    flat_store_dword v[0:1], v2
8873; GFX7-NEXT:    s_endpgm
8874;
8875; GFX10-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg:
8876; GFX10-WGP:       ; %bb.0: ; %entry
8877; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8878; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8879; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8880; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8881; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8882; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8883; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8884; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8885; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8886; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8887; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8888; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8889; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8890; GFX10-WGP-NEXT:    buffer_gl1_inv
8891; GFX10-WGP-NEXT:    buffer_gl0_inv
8892; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8893; GFX10-WGP-NEXT:    s_endpgm
8894;
8895; GFX10-CU-LABEL: global_agent_release_acquire_ret_cmpxchg:
8896; GFX10-CU:       ; %bb.0: ; %entry
8897; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8898; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8899; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8900; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8901; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8902; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8903; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8904; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8905; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8906; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8907; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8908; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8909; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8910; GFX10-CU-NEXT:    buffer_gl1_inv
8911; GFX10-CU-NEXT:    buffer_gl0_inv
8912; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8913; GFX10-CU-NEXT:    s_endpgm
8914;
8915; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg:
8916; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8917; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8918; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8919; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8920; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8921; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8922; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8923; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8924; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8925; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8926; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8927; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8928; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8929; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8930; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8931; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8932; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8933; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8934; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8935; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8936; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8937; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8938; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8939; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8940; SKIP-CACHE-INV-NEXT:    s_endpgm
8941;
8942; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg:
8943; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8944; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8945; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8946; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8947; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8948; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8949; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8950; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8951; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8952; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8953; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8954; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8955; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8956; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8957; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8958; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8959;
8960; GFX90A-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg:
8961; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8962; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8963; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8964; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8965; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8966; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8967; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8968; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8969; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8970; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8971; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8972; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8973; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8974; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8975; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8976; GFX90A-TGSPLIT-NEXT:    s_endpgm
8977;
8978; GFX940-NOTTGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg:
8979; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8980; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8981; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8982; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8983; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8984; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8985; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8986; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8987; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8988; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8989; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
8990; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8991; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
8992; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8993; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
8994; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8995; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8996;
8997; GFX940-TGSPLIT-LABEL: global_agent_release_acquire_ret_cmpxchg:
8998; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8999; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9000; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9001; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9002; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9003; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9004; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9005; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9006; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9007; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9008; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
9009; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9010; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9011; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9012; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
9013; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9014; GFX940-TGSPLIT-NEXT:    s_endpgm
9015;
9016; GFX11-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg:
9017; GFX11-WGP:       ; %bb.0: ; %entry
9018; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9019; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9020; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9021; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9022; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9023; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9024; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9025; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9026; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9027; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9028; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9029; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9030; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9031; GFX11-WGP-NEXT:    buffer_gl1_inv
9032; GFX11-WGP-NEXT:    buffer_gl0_inv
9033; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9034; GFX11-WGP-NEXT:    s_endpgm
9035;
9036; GFX11-CU-LABEL: global_agent_release_acquire_ret_cmpxchg:
9037; GFX11-CU:       ; %bb.0: ; %entry
9038; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9039; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9040; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9041; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9042; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9043; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9044; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9045; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9046; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9047; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9048; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9049; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9050; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9051; GFX11-CU-NEXT:    buffer_gl1_inv
9052; GFX11-CU-NEXT:    buffer_gl0_inv
9053; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9054; GFX11-CU-NEXT:    s_endpgm
9055;
9056; GFX12-WGP-LABEL: global_agent_release_acquire_ret_cmpxchg:
9057; GFX12-WGP:       ; %bb.0: ; %entry
9058; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9059; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9060; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9061; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9062; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9063; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9064; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9065; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9066; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9067; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9068; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9069; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9070; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9071; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9072; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9073; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9074; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9075; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
9076; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9077; GFX12-WGP-NEXT:    s_endpgm
9078;
9079; GFX12-CU-LABEL: global_agent_release_acquire_ret_cmpxchg:
9080; GFX12-CU:       ; %bb.0: ; %entry
9081; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9082; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9083; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9084; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9085; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9086; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9087; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9088; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9089; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9090; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9091; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9092; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9093; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9094; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9095; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9096; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9097; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9098; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
9099; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9100; GFX12-CU-NEXT:    s_endpgm
9101    ptr addrspace(1) %out, i32 %in, i32 %old) {
9102entry:
9103  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9104  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release acquire
9105  %val0 = extractvalue { i32, i1 } %val, 0
9106  store i32 %val0, ptr addrspace(1) %out, align 4
9107  ret void
9108}
9109
9110define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
9111; GFX6-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9112; GFX6:       ; %bb.0: ; %entry
9113; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9114; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9115; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9116; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9117; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9118; GFX6-NEXT:    s_mov_b32 s12, s5
9119; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9120; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9121; GFX6-NEXT:    s_mov_b32 s11, -1
9122; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9123; GFX6-NEXT:    s_mov_b32 s5, s12
9124; GFX6-NEXT:    s_mov_b32 s6, s11
9125; GFX6-NEXT:    s_mov_b32 s7, s10
9126; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9127; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9128; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9129; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9130; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9131; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9132; GFX6-NEXT:    s_waitcnt vmcnt(0)
9133; GFX6-NEXT:    buffer_wbinvl1
9134; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9135; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9136; GFX6-NEXT:    s_endpgm
9137;
9138; GFX7-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9139; GFX7:       ; %bb.0: ; %entry
9140; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9141; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9142; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9143; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9144; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9145; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9146; GFX7-NEXT:    s_mov_b32 s6, s4
9147; GFX7-NEXT:    s_mov_b32 s7, s5
9148; GFX7-NEXT:    s_mov_b32 s11, s12
9149; GFX7-NEXT:    s_mov_b32 s10, s13
9150; GFX7-NEXT:    s_add_u32 s6, s6, s11
9151; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9152; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9153; GFX7-NEXT:    s_mov_b32 s7, s10
9154; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9155; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9156; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9157; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9158; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9159; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9160; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9161; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9162; GFX7-NEXT:    s_waitcnt vmcnt(0)
9163; GFX7-NEXT:    buffer_wbinvl1_vol
9164; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9165; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9166; GFX7-NEXT:    flat_store_dword v[0:1], v2
9167; GFX7-NEXT:    s_endpgm
9168;
9169; GFX10-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9170; GFX10-WGP:       ; %bb.0: ; %entry
9171; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9172; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9173; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9174; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9175; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9176; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9177; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9178; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9179; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9180; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9181; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9182; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9183; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9184; GFX10-WGP-NEXT:    buffer_gl1_inv
9185; GFX10-WGP-NEXT:    buffer_gl0_inv
9186; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9187; GFX10-WGP-NEXT:    s_endpgm
9188;
9189; GFX10-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9190; GFX10-CU:       ; %bb.0: ; %entry
9191; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9192; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9193; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9194; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9195; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9196; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9197; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9198; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9199; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9200; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9201; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9202; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9203; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9204; GFX10-CU-NEXT:    buffer_gl1_inv
9205; GFX10-CU-NEXT:    buffer_gl0_inv
9206; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9207; GFX10-CU-NEXT:    s_endpgm
9208;
9209; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9210; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9211; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9212; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9213; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9214; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9215; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9216; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9217; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9218; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9219; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9220; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9221; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9222; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9223; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9224; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9225; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9226; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9227; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9228; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9229; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9230; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9231; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9232; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9233; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9234; SKIP-CACHE-INV-NEXT:    s_endpgm
9235;
9236; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9237; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9238; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9239; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9240; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9241; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9242; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9243; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9244; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9245; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9246; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9247; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9248; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9249; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9250; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9251; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9252; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9253;
9254; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9255; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9256; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9257; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9258; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9259; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9260; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9261; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9262; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9263; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9264; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9265; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9266; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9267; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9268; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9269; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9270; GFX90A-TGSPLIT-NEXT:    s_endpgm
9271;
9272; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9273; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9274; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9275; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9276; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9277; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9278; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9279; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9280; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9281; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9282; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9283; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
9284; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9285; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9286; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9287; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
9288; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9289; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9290;
9291; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9292; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9293; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9294; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9295; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9296; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9297; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9298; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9299; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9300; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9301; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9302; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
9303; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9304; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9305; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9306; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
9307; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9308; GFX940-TGSPLIT-NEXT:    s_endpgm
9309;
9310; GFX11-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9311; GFX11-WGP:       ; %bb.0: ; %entry
9312; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9313; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9314; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9315; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9316; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9317; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9318; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9319; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9320; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9321; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9322; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9323; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9324; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9325; GFX11-WGP-NEXT:    buffer_gl1_inv
9326; GFX11-WGP-NEXT:    buffer_gl0_inv
9327; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9328; GFX11-WGP-NEXT:    s_endpgm
9329;
9330; GFX11-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9331; GFX11-CU:       ; %bb.0: ; %entry
9332; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9333; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9334; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9335; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9336; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9337; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9338; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9339; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9340; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9341; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9342; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9343; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9344; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9345; GFX11-CU-NEXT:    buffer_gl1_inv
9346; GFX11-CU-NEXT:    buffer_gl0_inv
9347; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9348; GFX11-CU-NEXT:    s_endpgm
9349;
9350; GFX12-WGP-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9351; GFX12-WGP:       ; %bb.0: ; %entry
9352; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9353; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9354; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9355; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9356; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9357; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9358; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9359; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9360; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9361; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9362; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9363; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9364; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9365; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9366; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9367; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9368; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9369; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
9370; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9371; GFX12-WGP-NEXT:    s_endpgm
9372;
9373; GFX12-CU-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
9374; GFX12-CU:       ; %bb.0: ; %entry
9375; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9376; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9377; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9378; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9379; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9380; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9381; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9382; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9383; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9384; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9385; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9386; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9387; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9388; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9389; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9390; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9391; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9392; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
9393; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9394; GFX12-CU-NEXT:    s_endpgm
9395    ptr addrspace(1) %out, i32 %in, i32 %old) {
9396entry:
9397  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9398  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire
9399  %val0 = extractvalue { i32, i1 } %val, 0
9400  store i32 %val0, ptr addrspace(1) %out, align 4
9401  ret void
9402}
9403
9404define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
9405; GFX6-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9406; GFX6:       ; %bb.0: ; %entry
9407; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9408; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9409; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9410; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9411; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9412; GFX6-NEXT:    s_mov_b32 s12, s5
9413; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9414; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9415; GFX6-NEXT:    s_mov_b32 s11, -1
9416; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9417; GFX6-NEXT:    s_mov_b32 s5, s12
9418; GFX6-NEXT:    s_mov_b32 s6, s11
9419; GFX6-NEXT:    s_mov_b32 s7, s10
9420; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9421; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9422; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9423; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9424; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9425; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9426; GFX6-NEXT:    s_waitcnt vmcnt(0)
9427; GFX6-NEXT:    buffer_wbinvl1
9428; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9429; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9430; GFX6-NEXT:    s_endpgm
9431;
9432; GFX7-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9433; GFX7:       ; %bb.0: ; %entry
9434; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9435; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9436; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9437; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9438; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9439; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9440; GFX7-NEXT:    s_mov_b32 s6, s4
9441; GFX7-NEXT:    s_mov_b32 s7, s5
9442; GFX7-NEXT:    s_mov_b32 s11, s12
9443; GFX7-NEXT:    s_mov_b32 s10, s13
9444; GFX7-NEXT:    s_add_u32 s6, s6, s11
9445; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9446; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9447; GFX7-NEXT:    s_mov_b32 s7, s10
9448; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9449; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9450; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9451; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9452; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9453; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9454; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9455; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9456; GFX7-NEXT:    s_waitcnt vmcnt(0)
9457; GFX7-NEXT:    buffer_wbinvl1_vol
9458; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9459; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9460; GFX7-NEXT:    flat_store_dword v[0:1], v2
9461; GFX7-NEXT:    s_endpgm
9462;
9463; GFX10-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9464; GFX10-WGP:       ; %bb.0: ; %entry
9465; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9466; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9467; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9468; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9469; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9470; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9471; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9472; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9473; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9474; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9475; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9476; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9477; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9478; GFX10-WGP-NEXT:    buffer_gl1_inv
9479; GFX10-WGP-NEXT:    buffer_gl0_inv
9480; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9481; GFX10-WGP-NEXT:    s_endpgm
9482;
9483; GFX10-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9484; GFX10-CU:       ; %bb.0: ; %entry
9485; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9486; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9487; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9488; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9489; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9490; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9491; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9492; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9493; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9494; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9495; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9496; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9497; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9498; GFX10-CU-NEXT:    buffer_gl1_inv
9499; GFX10-CU-NEXT:    buffer_gl0_inv
9500; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9501; GFX10-CU-NEXT:    s_endpgm
9502;
9503; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9504; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9505; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9506; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9507; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9508; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9509; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9510; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9511; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9512; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9513; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9514; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9515; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9516; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9517; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9518; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9519; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9520; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9521; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9522; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9523; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9524; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9525; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9526; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9527; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9528; SKIP-CACHE-INV-NEXT:    s_endpgm
9529;
9530; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9531; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9532; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9533; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9534; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9535; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9536; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9537; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9538; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9539; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9540; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9541; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9542; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9543; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9544; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9545; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9546; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9547;
9548; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9549; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9550; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9551; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9552; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9553; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9554; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9555; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9556; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9557; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9558; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9559; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9560; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9561; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9562; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9563; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9564; GFX90A-TGSPLIT-NEXT:    s_endpgm
9565;
9566; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9567; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9568; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9569; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9570; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9571; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9572; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9573; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9574; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9575; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9576; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9577; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
9578; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9579; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9580; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9581; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
9582; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9583; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9584;
9585; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9586; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9587; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9588; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9589; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9590; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9591; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9592; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9593; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9594; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9595; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9596; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
9597; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9598; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9599; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9600; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
9601; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9602; GFX940-TGSPLIT-NEXT:    s_endpgm
9603;
9604; GFX11-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9605; GFX11-WGP:       ; %bb.0: ; %entry
9606; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9607; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9608; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9609; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9610; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9611; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9612; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9613; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9614; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9615; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9616; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9617; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9618; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9619; GFX11-WGP-NEXT:    buffer_gl1_inv
9620; GFX11-WGP-NEXT:    buffer_gl0_inv
9621; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9622; GFX11-WGP-NEXT:    s_endpgm
9623;
9624; GFX11-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9625; GFX11-CU:       ; %bb.0: ; %entry
9626; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9627; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9628; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9629; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9630; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9631; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9632; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9633; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9634; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9635; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9636; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9637; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9638; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9639; GFX11-CU-NEXT:    buffer_gl1_inv
9640; GFX11-CU-NEXT:    buffer_gl0_inv
9641; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9642; GFX11-CU-NEXT:    s_endpgm
9643;
9644; GFX12-WGP-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9645; GFX12-WGP:       ; %bb.0: ; %entry
9646; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9647; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9648; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9649; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9650; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9651; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9652; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9653; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9654; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9655; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9656; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9657; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9658; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9659; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9660; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9661; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9662; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9663; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
9664; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9665; GFX12-WGP-NEXT:    s_endpgm
9666;
9667; GFX12-CU-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
9668; GFX12-CU:       ; %bb.0: ; %entry
9669; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9670; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9671; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9672; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9673; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9674; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9675; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9676; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9677; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9678; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9679; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9680; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9681; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9682; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9683; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9684; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9685; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9686; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
9687; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9688; GFX12-CU-NEXT:    s_endpgm
9689    ptr addrspace(1) %out, i32 %in, i32 %old) {
9690entry:
9691  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9692  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire
9693  %val0 = extractvalue { i32, i1 } %val, 0
9694  store i32 %val0, ptr addrspace(1) %out, align 4
9695  ret void
9696}
9697
9698define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
9699; GFX6-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9700; GFX6:       ; %bb.0: ; %entry
9701; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9702; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9703; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9704; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9705; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9706; GFX6-NEXT:    s_mov_b32 s12, s5
9707; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9708; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9709; GFX6-NEXT:    s_mov_b32 s11, -1
9710; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9711; GFX6-NEXT:    s_mov_b32 s5, s12
9712; GFX6-NEXT:    s_mov_b32 s6, s11
9713; GFX6-NEXT:    s_mov_b32 s7, s10
9714; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9715; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9716; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9717; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9718; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9719; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9720; GFX6-NEXT:    s_waitcnt vmcnt(0)
9721; GFX6-NEXT:    buffer_wbinvl1
9722; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9723; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9724; GFX6-NEXT:    s_endpgm
9725;
9726; GFX7-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9727; GFX7:       ; %bb.0: ; %entry
9728; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9729; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9730; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9731; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9732; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9733; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9734; GFX7-NEXT:    s_mov_b32 s6, s4
9735; GFX7-NEXT:    s_mov_b32 s7, s5
9736; GFX7-NEXT:    s_mov_b32 s11, s12
9737; GFX7-NEXT:    s_mov_b32 s10, s13
9738; GFX7-NEXT:    s_add_u32 s6, s6, s11
9739; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9740; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9741; GFX7-NEXT:    s_mov_b32 s7, s10
9742; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9743; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9744; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9745; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9746; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9747; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9748; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9749; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9750; GFX7-NEXT:    s_waitcnt vmcnt(0)
9751; GFX7-NEXT:    buffer_wbinvl1_vol
9752; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9753; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9754; GFX7-NEXT:    flat_store_dword v[0:1], v2
9755; GFX7-NEXT:    s_endpgm
9756;
9757; GFX10-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9758; GFX10-WGP:       ; %bb.0: ; %entry
9759; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9760; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9761; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9762; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9763; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9764; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9765; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9766; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9767; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9768; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9769; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9770; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9771; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9772; GFX10-WGP-NEXT:    buffer_gl1_inv
9773; GFX10-WGP-NEXT:    buffer_gl0_inv
9774; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9775; GFX10-WGP-NEXT:    s_endpgm
9776;
9777; GFX10-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9778; GFX10-CU:       ; %bb.0: ; %entry
9779; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9780; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9781; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9782; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9783; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9784; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9785; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9786; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9787; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9788; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9789; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9790; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9791; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9792; GFX10-CU-NEXT:    buffer_gl1_inv
9793; GFX10-CU-NEXT:    buffer_gl0_inv
9794; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9795; GFX10-CU-NEXT:    s_endpgm
9796;
9797; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9798; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9799; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9800; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9801; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9802; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9803; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9804; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9805; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9806; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9808; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9809; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9810; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9811; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9812; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9813; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9814; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9815; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9816; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9817; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9818; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9819; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9820; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9821; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9822; SKIP-CACHE-INV-NEXT:    s_endpgm
9823;
9824; GFX90A-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9825; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9826; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9827; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9828; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9829; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9830; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9831; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9832; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9833; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9834; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9835; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9836; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9837; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9838; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9839; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9840; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9841;
9842; GFX90A-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9843; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9844; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9845; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9846; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9847; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9848; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9849; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9850; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9851; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9852; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9853; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9854; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9855; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9856; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9857; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9858; GFX90A-TGSPLIT-NEXT:    s_endpgm
9859;
9860; GFX940-NOTTGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9861; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9862; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9863; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9864; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9865; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9866; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9867; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9868; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9869; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9870; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9871; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
9872; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9873; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9874; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9875; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
9876; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9877; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9878;
9879; GFX940-TGSPLIT-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9880; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9881; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9882; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9883; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9884; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9885; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9886; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9887; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9888; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9889; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9890; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
9891; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9892; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
9893; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9894; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
9895; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9896; GFX940-TGSPLIT-NEXT:    s_endpgm
9897;
9898; GFX11-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9899; GFX11-WGP:       ; %bb.0: ; %entry
9900; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9901; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9902; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9903; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9904; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9905; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9906; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9907; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9908; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9909; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9910; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9911; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9912; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9913; GFX11-WGP-NEXT:    buffer_gl1_inv
9914; GFX11-WGP-NEXT:    buffer_gl0_inv
9915; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9916; GFX11-WGP-NEXT:    s_endpgm
9917;
9918; GFX11-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9919; GFX11-CU:       ; %bb.0: ; %entry
9920; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9921; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9922; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9923; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9924; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9925; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9926; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9927; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9928; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9929; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9930; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9931; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9932; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9933; GFX11-CU-NEXT:    buffer_gl1_inv
9934; GFX11-CU-NEXT:    buffer_gl0_inv
9935; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9936; GFX11-CU-NEXT:    s_endpgm
9937;
9938; GFX12-WGP-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9939; GFX12-WGP:       ; %bb.0: ; %entry
9940; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9941; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9942; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9943; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9944; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9945; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9946; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9947; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9948; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9949; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9950; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9951; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9952; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9953; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9954; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9955; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9956; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9957; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
9958; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9959; GFX12-WGP-NEXT:    s_endpgm
9960;
9961; GFX12-CU-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
9962; GFX12-CU:       ; %bb.0: ; %entry
9963; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9964; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9965; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9966; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9967; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9968; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9969; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9970; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9971; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9972; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9973; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9974; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9975; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9976; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
9977; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9978; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9979; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9980; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
9981; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9982; GFX12-CU-NEXT:    s_endpgm
9983    ptr addrspace(1) %out, i32 %in, i32 %old) {
9984entry:
9985  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9986  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst
9987  %val0 = extractvalue { i32, i1 } %val, 0
9988  store i32 %val0, ptr addrspace(1) %out, align 4
9989  ret void
9990}
9991
9992define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
9993; GFX6-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
9994; GFX6:       ; %bb.0: ; %entry
9995; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9996; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9997; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9998; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9999; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10000; GFX6-NEXT:    s_mov_b32 s12, s5
10001; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10002; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
10003; GFX6-NEXT:    s_mov_b32 s11, -1
10004; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10005; GFX6-NEXT:    s_mov_b32 s5, s12
10006; GFX6-NEXT:    s_mov_b32 s6, s11
10007; GFX6-NEXT:    s_mov_b32 s7, s10
10008; GFX6-NEXT:    v_mov_b32_e32 v0, s9
10009; GFX6-NEXT:    v_mov_b32_e32 v2, s8
10010; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10011; GFX6-NEXT:    v_mov_b32_e32 v1, v2
10012; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10013; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
10014; GFX6-NEXT:    s_waitcnt vmcnt(0)
10015; GFX6-NEXT:    buffer_wbinvl1
10016; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10017; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10018; GFX6-NEXT:    s_endpgm
10019;
10020; GFX7-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10021; GFX7:       ; %bb.0: ; %entry
10022; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
10023; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10024; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
10025; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
10026; GFX7-NEXT:    s_mov_b64 s[12:13], 16
10027; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10028; GFX7-NEXT:    s_mov_b32 s6, s4
10029; GFX7-NEXT:    s_mov_b32 s7, s5
10030; GFX7-NEXT:    s_mov_b32 s11, s12
10031; GFX7-NEXT:    s_mov_b32 s10, s13
10032; GFX7-NEXT:    s_add_u32 s6, s6, s11
10033; GFX7-NEXT:    s_addc_u32 s10, s7, s10
10034; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
10035; GFX7-NEXT:    s_mov_b32 s7, s10
10036; GFX7-NEXT:    v_mov_b32_e32 v2, s9
10037; GFX7-NEXT:    v_mov_b32_e32 v0, s8
10038; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10039; GFX7-NEXT:    v_mov_b32_e32 v3, v0
10040; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10041; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10042; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10043; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
10044; GFX7-NEXT:    s_waitcnt vmcnt(0)
10045; GFX7-NEXT:    buffer_wbinvl1_vol
10046; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10047; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10048; GFX7-NEXT:    flat_store_dword v[0:1], v2
10049; GFX7-NEXT:    s_endpgm
10050;
10051; GFX10-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10052; GFX10-WGP:       ; %bb.0: ; %entry
10053; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10054; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10055; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
10056; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
10057; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10058; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
10059; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
10060; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10061; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
10062; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10063; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10064; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10065; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10066; GFX10-WGP-NEXT:    buffer_gl1_inv
10067; GFX10-WGP-NEXT:    buffer_gl0_inv
10068; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10069; GFX10-WGP-NEXT:    s_endpgm
10070;
10071; GFX10-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10072; GFX10-CU:       ; %bb.0: ; %entry
10073; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10074; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10075; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
10076; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
10077; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10078; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
10079; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
10080; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10081; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
10082; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10083; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10084; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10085; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10086; GFX10-CU-NEXT:    buffer_gl1_inv
10087; GFX10-CU-NEXT:    buffer_gl0_inv
10088; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10089; GFX10-CU-NEXT:    s_endpgm
10090;
10091; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10092; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10093; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
10094; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
10095; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
10096; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
10097; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10098; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
10099; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10100; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
10101; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
10102; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10103; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
10104; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
10105; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
10106; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
10107; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
10108; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10109; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
10110; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10111; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
10112; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10113; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10114; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10115; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10116; SKIP-CACHE-INV-NEXT:    s_endpgm
10117;
10118; GFX90A-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10119; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10120; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10121; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10122; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10123; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10124; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10125; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10126; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10127; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10128; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10129; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10130; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10131; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10132; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
10133; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10134; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10135;
10136; GFX90A-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10137; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10138; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10139; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10140; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10141; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10142; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10143; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10144; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10145; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10146; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10147; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10148; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10149; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10150; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
10151; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10152; GFX90A-TGSPLIT-NEXT:    s_endpgm
10153;
10154; GFX940-NOTTGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10155; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10156; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10157; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10158; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10159; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10160; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10161; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10162; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10163; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10164; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10165; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
10166; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10167; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10168; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10169; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
10170; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10171; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10172;
10173; GFX940-TGSPLIT-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10174; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10175; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10176; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10177; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10178; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10179; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10180; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10181; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10182; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10183; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10184; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
10185; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10186; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10187; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10188; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
10189; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10190; GFX940-TGSPLIT-NEXT:    s_endpgm
10191;
10192; GFX11-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10193; GFX11-WGP:       ; %bb.0: ; %entry
10194; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10195; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10196; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10197; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10198; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10199; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
10200; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
10201; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10202; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
10203; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10204; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10205; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10206; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10207; GFX11-WGP-NEXT:    buffer_gl1_inv
10208; GFX11-WGP-NEXT:    buffer_gl0_inv
10209; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10210; GFX11-WGP-NEXT:    s_endpgm
10211;
10212; GFX11-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10213; GFX11-CU:       ; %bb.0: ; %entry
10214; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10215; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10216; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10217; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10218; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10219; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
10220; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
10221; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10222; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
10223; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10224; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10225; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10226; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10227; GFX11-CU-NEXT:    buffer_gl1_inv
10228; GFX11-CU-NEXT:    buffer_gl0_inv
10229; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10230; GFX11-CU-NEXT:    s_endpgm
10231;
10232; GFX12-WGP-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10233; GFX12-WGP:       ; %bb.0: ; %entry
10234; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10235; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10236; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10237; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10238; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10239; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
10240; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
10241; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10242; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
10243; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10244; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10245; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
10246; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
10247; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10248; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10249; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
10250; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10251; GFX12-WGP-NEXT:    s_endpgm
10252;
10253; GFX12-CU-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
10254; GFX12-CU:       ; %bb.0: ; %entry
10255; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10256; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10257; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10258; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10259; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10260; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
10261; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
10262; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10263; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
10264; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10265; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10266; GFX12-CU-NEXT:    s_wait_storecnt 0x0
10267; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
10268; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10269; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10270; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
10271; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10272; GFX12-CU-NEXT:    s_endpgm
10273    ptr addrspace(1) %out, i32 %in, i32 %old) {
10274entry:
10275  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
10276  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst
10277  %val0 = extractvalue { i32, i1 } %val, 0
10278  store i32 %val0, ptr addrspace(1) %out, align 4
10279  ret void
10280}
10281
10282define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
10283; GFX6-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10284; GFX6:       ; %bb.0: ; %entry
10285; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
10286; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10287; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
10288; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
10289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10290; GFX6-NEXT:    s_mov_b32 s12, s5
10291; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10292; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
10293; GFX6-NEXT:    s_mov_b32 s11, -1
10294; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10295; GFX6-NEXT:    s_mov_b32 s5, s12
10296; GFX6-NEXT:    s_mov_b32 s6, s11
10297; GFX6-NEXT:    s_mov_b32 s7, s10
10298; GFX6-NEXT:    v_mov_b32_e32 v0, s9
10299; GFX6-NEXT:    v_mov_b32_e32 v2, s8
10300; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10301; GFX6-NEXT:    v_mov_b32_e32 v1, v2
10302; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10303; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
10304; GFX6-NEXT:    s_waitcnt vmcnt(0)
10305; GFX6-NEXT:    buffer_wbinvl1
10306; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10307; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10308; GFX6-NEXT:    s_endpgm
10309;
10310; GFX7-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10311; GFX7:       ; %bb.0: ; %entry
10312; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
10313; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10314; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
10315; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
10316; GFX7-NEXT:    s_mov_b64 s[12:13], 16
10317; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10318; GFX7-NEXT:    s_mov_b32 s6, s4
10319; GFX7-NEXT:    s_mov_b32 s7, s5
10320; GFX7-NEXT:    s_mov_b32 s11, s12
10321; GFX7-NEXT:    s_mov_b32 s10, s13
10322; GFX7-NEXT:    s_add_u32 s6, s6, s11
10323; GFX7-NEXT:    s_addc_u32 s10, s7, s10
10324; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
10325; GFX7-NEXT:    s_mov_b32 s7, s10
10326; GFX7-NEXT:    v_mov_b32_e32 v2, s9
10327; GFX7-NEXT:    v_mov_b32_e32 v0, s8
10328; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10329; GFX7-NEXT:    v_mov_b32_e32 v3, v0
10330; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10331; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10332; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10333; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
10334; GFX7-NEXT:    s_waitcnt vmcnt(0)
10335; GFX7-NEXT:    buffer_wbinvl1_vol
10336; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10337; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10338; GFX7-NEXT:    flat_store_dword v[0:1], v2
10339; GFX7-NEXT:    s_endpgm
10340;
10341; GFX10-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10342; GFX10-WGP:       ; %bb.0: ; %entry
10343; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10344; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10345; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
10346; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
10347; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10348; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
10349; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
10350; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10351; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
10352; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10353; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10354; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10355; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10356; GFX10-WGP-NEXT:    buffer_gl1_inv
10357; GFX10-WGP-NEXT:    buffer_gl0_inv
10358; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10359; GFX10-WGP-NEXT:    s_endpgm
10360;
10361; GFX10-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10362; GFX10-CU:       ; %bb.0: ; %entry
10363; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10364; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10365; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
10366; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
10367; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10368; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
10369; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
10370; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10371; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
10372; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10373; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10374; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10375; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10376; GFX10-CU-NEXT:    buffer_gl1_inv
10377; GFX10-CU-NEXT:    buffer_gl0_inv
10378; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10379; GFX10-CU-NEXT:    s_endpgm
10380;
10381; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10382; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10383; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
10384; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
10385; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
10386; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
10387; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10388; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
10389; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10390; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
10391; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
10392; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10393; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
10394; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
10395; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
10396; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
10397; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
10398; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10399; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
10400; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10401; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
10402; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10403; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10404; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10405; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10406; SKIP-CACHE-INV-NEXT:    s_endpgm
10407;
10408; GFX90A-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10409; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10410; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10411; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10412; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10413; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10414; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10415; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10416; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10417; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10418; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10419; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10420; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10421; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10422; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
10423; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10424; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10425;
10426; GFX90A-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10427; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10428; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10429; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10430; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10431; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10432; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10433; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10434; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10435; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10436; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10437; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10438; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10439; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10440; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
10441; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10442; GFX90A-TGSPLIT-NEXT:    s_endpgm
10443;
10444; GFX940-NOTTGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10445; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10446; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10447; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10448; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10449; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10450; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10451; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10452; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10453; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10454; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10455; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
10456; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10457; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10458; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10459; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
10460; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10461; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10462;
10463; GFX940-TGSPLIT-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10464; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10465; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10466; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10467; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10468; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10469; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10470; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10471; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10472; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10473; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10474; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
10475; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10476; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10477; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10478; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
10479; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10480; GFX940-TGSPLIT-NEXT:    s_endpgm
10481;
10482; GFX11-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10483; GFX11-WGP:       ; %bb.0: ; %entry
10484; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10485; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10486; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10487; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10488; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10489; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
10490; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
10491; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10492; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
10493; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10494; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10495; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10496; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10497; GFX11-WGP-NEXT:    buffer_gl1_inv
10498; GFX11-WGP-NEXT:    buffer_gl0_inv
10499; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10500; GFX11-WGP-NEXT:    s_endpgm
10501;
10502; GFX11-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10503; GFX11-CU:       ; %bb.0: ; %entry
10504; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10505; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10506; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10507; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10508; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10509; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
10510; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
10511; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10512; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
10513; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10514; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10515; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10516; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10517; GFX11-CU-NEXT:    buffer_gl1_inv
10518; GFX11-CU-NEXT:    buffer_gl0_inv
10519; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10520; GFX11-CU-NEXT:    s_endpgm
10521;
10522; GFX12-WGP-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10523; GFX12-WGP:       ; %bb.0: ; %entry
10524; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10525; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10526; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10527; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10528; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10529; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
10530; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
10531; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10532; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
10533; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10534; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10535; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
10536; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
10537; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10538; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10539; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10540; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10541; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
10542; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10543; GFX12-WGP-NEXT:    s_endpgm
10544;
10545; GFX12-CU-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
10546; GFX12-CU:       ; %bb.0: ; %entry
10547; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10548; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10549; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10550; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10551; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10552; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
10553; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
10554; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10555; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
10556; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10557; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10558; GFX12-CU-NEXT:    s_wait_storecnt 0x0
10559; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
10560; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10561; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10562; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10563; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10564; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
10565; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10566; GFX12-CU-NEXT:    s_endpgm
10567    ptr addrspace(1) %out, i32 %in, i32 %old) {
10568entry:
10569  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
10570  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") release seq_cst
10571  %val0 = extractvalue { i32, i1 } %val, 0
10572  store i32 %val0, ptr addrspace(1) %out, align 4
10573  ret void
10574}
10575
10576define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
10577; GFX6-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10578; GFX6:       ; %bb.0: ; %entry
10579; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
10580; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10581; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
10582; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
10583; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10584; GFX6-NEXT:    s_mov_b32 s12, s5
10585; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10586; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
10587; GFX6-NEXT:    s_mov_b32 s11, -1
10588; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10589; GFX6-NEXT:    s_mov_b32 s5, s12
10590; GFX6-NEXT:    s_mov_b32 s6, s11
10591; GFX6-NEXT:    s_mov_b32 s7, s10
10592; GFX6-NEXT:    v_mov_b32_e32 v0, s9
10593; GFX6-NEXT:    v_mov_b32_e32 v2, s8
10594; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10595; GFX6-NEXT:    v_mov_b32_e32 v1, v2
10596; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10597; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
10598; GFX6-NEXT:    s_waitcnt vmcnt(0)
10599; GFX6-NEXT:    buffer_wbinvl1
10600; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10601; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10602; GFX6-NEXT:    s_endpgm
10603;
10604; GFX7-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10605; GFX7:       ; %bb.0: ; %entry
10606; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
10607; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10608; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
10609; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
10610; GFX7-NEXT:    s_mov_b64 s[12:13], 16
10611; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10612; GFX7-NEXT:    s_mov_b32 s6, s4
10613; GFX7-NEXT:    s_mov_b32 s7, s5
10614; GFX7-NEXT:    s_mov_b32 s11, s12
10615; GFX7-NEXT:    s_mov_b32 s10, s13
10616; GFX7-NEXT:    s_add_u32 s6, s6, s11
10617; GFX7-NEXT:    s_addc_u32 s10, s7, s10
10618; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
10619; GFX7-NEXT:    s_mov_b32 s7, s10
10620; GFX7-NEXT:    v_mov_b32_e32 v2, s9
10621; GFX7-NEXT:    v_mov_b32_e32 v0, s8
10622; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10623; GFX7-NEXT:    v_mov_b32_e32 v3, v0
10624; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10625; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10626; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10627; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
10628; GFX7-NEXT:    s_waitcnt vmcnt(0)
10629; GFX7-NEXT:    buffer_wbinvl1_vol
10630; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10631; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10632; GFX7-NEXT:    flat_store_dword v[0:1], v2
10633; GFX7-NEXT:    s_endpgm
10634;
10635; GFX10-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10636; GFX10-WGP:       ; %bb.0: ; %entry
10637; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10638; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10639; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
10640; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
10641; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10642; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
10643; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
10644; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10645; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
10646; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10647; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10648; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10649; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10650; GFX10-WGP-NEXT:    buffer_gl1_inv
10651; GFX10-WGP-NEXT:    buffer_gl0_inv
10652; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10653; GFX10-WGP-NEXT:    s_endpgm
10654;
10655; GFX10-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10656; GFX10-CU:       ; %bb.0: ; %entry
10657; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10658; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10659; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
10660; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
10661; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10662; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
10663; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
10664; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10665; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
10666; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10667; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10668; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10669; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10670; GFX10-CU-NEXT:    buffer_gl1_inv
10671; GFX10-CU-NEXT:    buffer_gl0_inv
10672; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10673; GFX10-CU-NEXT:    s_endpgm
10674;
10675; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10676; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10677; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
10678; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
10679; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
10680; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
10681; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10682; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
10683; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10684; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
10685; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
10686; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10687; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
10688; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
10689; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
10690; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
10691; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
10692; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10693; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
10694; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10695; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
10696; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10697; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10698; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10699; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10700; SKIP-CACHE-INV-NEXT:    s_endpgm
10701;
10702; GFX90A-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10703; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10704; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10705; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10706; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10707; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10708; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10709; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10710; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10711; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10712; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10713; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10714; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10715; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10716; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
10717; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10718; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10719;
10720; GFX90A-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10721; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10722; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10723; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10724; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
10725; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
10726; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10727; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
10728; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10729; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10730; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10731; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10732; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
10733; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10734; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
10735; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10736; GFX90A-TGSPLIT-NEXT:    s_endpgm
10737;
10738; GFX940-NOTTGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10739; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10740; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10741; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10742; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10743; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10744; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10745; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10746; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10747; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10748; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10749; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
10750; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10751; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10752; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10753; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
10754; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10755; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10756;
10757; GFX940-TGSPLIT-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10758; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10759; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10760; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
10761; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
10762; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
10763; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10764; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
10765; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10766; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10767; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
10768; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
10769; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10770; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
10771; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10772; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
10773; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10774; GFX940-TGSPLIT-NEXT:    s_endpgm
10775;
10776; GFX11-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10777; GFX11-WGP:       ; %bb.0: ; %entry
10778; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10779; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10780; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10781; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10782; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10783; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
10784; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
10785; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10786; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
10787; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10788; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10789; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10790; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10791; GFX11-WGP-NEXT:    buffer_gl1_inv
10792; GFX11-WGP-NEXT:    buffer_gl0_inv
10793; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10794; GFX11-WGP-NEXT:    s_endpgm
10795;
10796; GFX11-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10797; GFX11-CU:       ; %bb.0: ; %entry
10798; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10799; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10800; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10801; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10802; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10803; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
10804; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
10805; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10806; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
10807; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10808; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10809; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
10810; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10811; GFX11-CU-NEXT:    buffer_gl1_inv
10812; GFX11-CU-NEXT:    buffer_gl0_inv
10813; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10814; GFX11-CU-NEXT:    s_endpgm
10815;
10816; GFX12-WGP-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10817; GFX12-WGP:       ; %bb.0: ; %entry
10818; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10819; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10820; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
10821; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
10822; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10823; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
10824; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
10825; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10826; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
10827; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10828; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10829; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
10830; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
10831; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10832; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10833; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10834; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10835; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
10836; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10837; GFX12-WGP-NEXT:    s_endpgm
10838;
10839; GFX12-CU-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
10840; GFX12-CU:       ; %bb.0: ; %entry
10841; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10842; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
10843; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
10844; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
10845; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10846; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
10847; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
10848; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10849; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
10850; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10851; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10852; GFX12-CU-NEXT:    s_wait_storecnt 0x0
10853; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
10854; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
10855; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10856; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10857; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10858; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
10859; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10860; GFX12-CU-NEXT:    s_endpgm
10861    ptr addrspace(1) %out, i32 %in, i32 %old) {
10862entry:
10863  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
10864  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst
10865  %val0 = extractvalue { i32, i1 } %val, 0
10866  store i32 %val0, ptr addrspace(1) %out, align 4
10867  ret void
10868}
10869
10870define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
10871; GFX6-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10872; GFX6:       ; %bb.0: ; %entry
10873; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
10874; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10875; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
10876; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
10877; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10878; GFX6-NEXT:    s_mov_b32 s12, s5
10879; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10880; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
10881; GFX6-NEXT:    s_mov_b32 s11, -1
10882; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10883; GFX6-NEXT:    s_mov_b32 s5, s12
10884; GFX6-NEXT:    s_mov_b32 s6, s11
10885; GFX6-NEXT:    s_mov_b32 s7, s10
10886; GFX6-NEXT:    v_mov_b32_e32 v0, s9
10887; GFX6-NEXT:    v_mov_b32_e32 v2, s8
10888; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10889; GFX6-NEXT:    v_mov_b32_e32 v1, v2
10890; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10891; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
10892; GFX6-NEXT:    s_waitcnt vmcnt(0)
10893; GFX6-NEXT:    buffer_wbinvl1
10894; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10895; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10896; GFX6-NEXT:    s_endpgm
10897;
10898; GFX7-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10899; GFX7:       ; %bb.0: ; %entry
10900; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
10901; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
10902; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
10903; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
10904; GFX7-NEXT:    s_mov_b64 s[12:13], 16
10905; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10906; GFX7-NEXT:    s_mov_b32 s6, s4
10907; GFX7-NEXT:    s_mov_b32 s7, s5
10908; GFX7-NEXT:    s_mov_b32 s11, s12
10909; GFX7-NEXT:    s_mov_b32 s10, s13
10910; GFX7-NEXT:    s_add_u32 s6, s6, s11
10911; GFX7-NEXT:    s_addc_u32 s10, s7, s10
10912; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
10913; GFX7-NEXT:    s_mov_b32 s7, s10
10914; GFX7-NEXT:    v_mov_b32_e32 v2, s9
10915; GFX7-NEXT:    v_mov_b32_e32 v0, s8
10916; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
10917; GFX7-NEXT:    v_mov_b32_e32 v3, v0
10918; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10919; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10920; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10921; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
10922; GFX7-NEXT:    s_waitcnt vmcnt(0)
10923; GFX7-NEXT:    buffer_wbinvl1_vol
10924; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10925; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10926; GFX7-NEXT:    flat_store_dword v[0:1], v2
10927; GFX7-NEXT:    s_endpgm
10928;
10929; GFX10-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10930; GFX10-WGP:       ; %bb.0: ; %entry
10931; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10932; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10933; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
10934; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
10935; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10936; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
10937; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
10938; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10939; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
10940; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10941; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10942; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10943; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10944; GFX10-WGP-NEXT:    buffer_gl1_inv
10945; GFX10-WGP-NEXT:    buffer_gl0_inv
10946; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10947; GFX10-WGP-NEXT:    s_endpgm
10948;
10949; GFX10-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10950; GFX10-CU:       ; %bb.0: ; %entry
10951; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10952; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
10953; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
10954; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
10955; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10956; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
10957; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
10958; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
10959; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
10960; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10961; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10962; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
10963; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10964; GFX10-CU-NEXT:    buffer_gl1_inv
10965; GFX10-CU-NEXT:    buffer_gl0_inv
10966; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10967; GFX10-CU-NEXT:    s_endpgm
10968;
10969; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10970; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10971; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
10972; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
10973; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
10974; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
10975; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10976; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
10977; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10978; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
10979; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
10980; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10981; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
10982; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
10983; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
10984; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
10985; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
10986; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
10987; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
10988; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10989; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
10990; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10991; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
10992; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10993; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10994; SKIP-CACHE-INV-NEXT:    s_endpgm
10995;
10996; GFX90A-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
10997; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10998; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10999; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11000; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
11001; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
11002; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11003; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
11004; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11005; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
11006; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
11007; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11008; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
11009; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11010; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
11011; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11012; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11013;
11014; GFX90A-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11015; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11016; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11017; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11018; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
11019; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
11020; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11021; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
11022; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11023; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
11024; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
11025; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11026; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
11027; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11028; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
11029; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11030; GFX90A-TGSPLIT-NEXT:    s_endpgm
11031;
11032; GFX940-NOTTGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11033; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11034; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11035; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11036; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
11037; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
11038; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11039; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
11040; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11041; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
11042; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
11043; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
11044; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11045; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
11046; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11047; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
11048; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11049; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11050;
11051; GFX940-TGSPLIT-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11052; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11053; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11054; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11055; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
11056; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
11057; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11058; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
11059; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11060; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
11061; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
11062; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
11063; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11064; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
11065; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11066; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
11067; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11068; GFX940-TGSPLIT-NEXT:    s_endpgm
11069;
11070; GFX11-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11071; GFX11-WGP:       ; %bb.0: ; %entry
11072; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11073; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11074; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
11075; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
11076; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11077; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
11078; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
11079; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
11080; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
11081; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11082; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11083; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
11084; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11085; GFX11-WGP-NEXT:    buffer_gl1_inv
11086; GFX11-WGP-NEXT:    buffer_gl0_inv
11087; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11088; GFX11-WGP-NEXT:    s_endpgm
11089;
11090; GFX11-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11091; GFX11-CU:       ; %bb.0: ; %entry
11092; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11093; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11094; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
11095; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
11096; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11097; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
11098; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
11099; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
11100; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
11101; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11102; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11103; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
11104; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11105; GFX11-CU-NEXT:    buffer_gl1_inv
11106; GFX11-CU-NEXT:    buffer_gl0_inv
11107; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11108; GFX11-CU-NEXT:    s_endpgm
11109;
11110; GFX12-WGP-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11111; GFX12-WGP:       ; %bb.0: ; %entry
11112; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11113; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11114; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
11115; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
11116; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11117; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
11118; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
11119; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
11120; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
11121; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11122; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11123; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11124; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
11125; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
11126; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11127; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11128; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11129; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
11130; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11131; GFX12-WGP-NEXT:    s_endpgm
11132;
11133; GFX12-CU-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
11134; GFX12-CU:       ; %bb.0: ; %entry
11135; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11136; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11137; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
11138; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
11139; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11140; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
11141; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
11142; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
11143; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
11144; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11145; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11146; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11147; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
11148; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
11149; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11150; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11151; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11152; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
11153; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11154; GFX12-CU-NEXT:    s_endpgm
11155    ptr addrspace(1) %out, i32 %in, i32 %old) {
11156entry:
11157  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
11158  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst
11159  %val0 = extractvalue { i32, i1 } %val, 0
11160  store i32 %val0, ptr addrspace(1) %out, align 4
11161  ret void
11162}
11163
11164define amdgpu_kernel void @global_agent_one_as_unordered_load(
11165; GFX6-LABEL: global_agent_one_as_unordered_load:
11166; GFX6:       ; %bb.0: ; %entry
11167; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11168; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
11169; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11170; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11171; GFX6-NEXT:    s_mov_b32 s6, s9
11172; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
11173; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
11174; GFX6-NEXT:    s_mov_b32 s13, -1
11175; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
11176; GFX6-NEXT:    s_mov_b32 s9, s6
11177; GFX6-NEXT:    s_mov_b32 s10, s13
11178; GFX6-NEXT:    s_mov_b32 s11, s12
11179; GFX6-NEXT:    s_mov_b32 s14, s5
11180; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11181; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11182; GFX6-NEXT:    s_mov_b32 s5, s14
11183; GFX6-NEXT:    s_mov_b32 s6, s13
11184; GFX6-NEXT:    s_mov_b32 s7, s12
11185; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0
11186; GFX6-NEXT:    s_waitcnt vmcnt(0)
11187; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11188; GFX6-NEXT:    s_endpgm
11189;
11190; GFX7-LABEL: global_agent_one_as_unordered_load:
11191; GFX7:       ; %bb.0: ; %entry
11192; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11193; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
11194; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11195; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11196; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11197; GFX7-NEXT:    flat_load_dword v2, v[0:1]
11198; GFX7-NEXT:    v_mov_b32_e32 v0, s4
11199; GFX7-NEXT:    v_mov_b32_e32 v1, s5
11200; GFX7-NEXT:    s_waitcnt vmcnt(0)
11201; GFX7-NEXT:    flat_store_dword v[0:1], v2
11202; GFX7-NEXT:    s_endpgm
11203;
11204; GFX10-WGP-LABEL: global_agent_one_as_unordered_load:
11205; GFX10-WGP:       ; %bb.0: ; %entry
11206; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11207; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11208; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11209; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11210; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7]
11211; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11212; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11213; GFX10-WGP-NEXT:    s_endpgm
11214;
11215; GFX10-CU-LABEL: global_agent_one_as_unordered_load:
11216; GFX10-CU:       ; %bb.0: ; %entry
11217; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11218; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11219; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11220; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11221; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7]
11222; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11223; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11224; GFX10-CU-NEXT:    s_endpgm
11225;
11226; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load:
11227; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11228; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11229; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
11230; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11231; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11232; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
11233; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11234; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
11235; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
11236; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11237; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
11238; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
11239; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
11240; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
11241; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11242; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11243; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
11244; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
11245; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
11246; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0
11247; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11248; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11249; SKIP-CACHE-INV-NEXT:    s_endpgm
11250;
11251; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load:
11252; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11253; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11254; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11255; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11256; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11257; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
11258; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11259; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11260; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11261;
11262; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_load:
11263; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11264; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11265; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11266; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11267; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11268; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
11269; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11270; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11271; GFX90A-TGSPLIT-NEXT:    s_endpgm
11272;
11273; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_load:
11274; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11275; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11276; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11277; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11278; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11279; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
11280; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11281; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11282; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11283;
11284; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_load:
11285; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11286; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11287; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11288; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11289; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11290; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
11291; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11292; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11293; GFX940-TGSPLIT-NEXT:    s_endpgm
11294;
11295; GFX11-WGP-LABEL: global_agent_one_as_unordered_load:
11296; GFX11-WGP:       ; %bb.0: ; %entry
11297; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11298; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11299; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11300; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11301; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
11302; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11303; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11304; GFX11-WGP-NEXT:    s_endpgm
11305;
11306; GFX11-CU-LABEL: global_agent_one_as_unordered_load:
11307; GFX11-CU:       ; %bb.0: ; %entry
11308; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11309; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11310; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11311; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11312; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
11313; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11314; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11315; GFX11-CU-NEXT:    s_endpgm
11316;
11317; GFX12-WGP-LABEL: global_agent_one_as_unordered_load:
11318; GFX12-WGP:       ; %bb.0: ; %entry
11319; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11320; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11321; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11322; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11323; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
11324; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11325; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11326; GFX12-WGP-NEXT:    s_endpgm
11327;
11328; GFX12-CU-LABEL: global_agent_one_as_unordered_load:
11329; GFX12-CU:       ; %bb.0: ; %entry
11330; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11331; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11332; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11333; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11334; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
11335; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11336; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11337; GFX12-CU-NEXT:    s_endpgm
11338    ptr addrspace(1) %in, ptr addrspace(1) %out) {
11339entry:
11340  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") unordered, align 4
11341  store i32 %val, ptr addrspace(1) %out
11342  ret void
11343}
11344
11345define amdgpu_kernel void @global_agent_one_as_monotonic_load(
11346; GFX6-LABEL: global_agent_one_as_monotonic_load:
11347; GFX6:       ; %bb.0: ; %entry
11348; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11349; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
11350; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11351; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11352; GFX6-NEXT:    s_mov_b32 s6, s9
11353; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
11354; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
11355; GFX6-NEXT:    s_mov_b32 s13, -1
11356; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
11357; GFX6-NEXT:    s_mov_b32 s9, s6
11358; GFX6-NEXT:    s_mov_b32 s10, s13
11359; GFX6-NEXT:    s_mov_b32 s11, s12
11360; GFX6-NEXT:    s_mov_b32 s14, s5
11361; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11362; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11363; GFX6-NEXT:    s_mov_b32 s5, s14
11364; GFX6-NEXT:    s_mov_b32 s6, s13
11365; GFX6-NEXT:    s_mov_b32 s7, s12
11366; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
11367; GFX6-NEXT:    s_waitcnt vmcnt(0)
11368; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11369; GFX6-NEXT:    s_endpgm
11370;
11371; GFX7-LABEL: global_agent_one_as_monotonic_load:
11372; GFX7:       ; %bb.0: ; %entry
11373; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11374; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
11375; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11376; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11377; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11378; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
11379; GFX7-NEXT:    v_mov_b32_e32 v0, s4
11380; GFX7-NEXT:    v_mov_b32_e32 v1, s5
11381; GFX7-NEXT:    s_waitcnt vmcnt(0)
11382; GFX7-NEXT:    flat_store_dword v[0:1], v2
11383; GFX7-NEXT:    s_endpgm
11384;
11385; GFX10-WGP-LABEL: global_agent_one_as_monotonic_load:
11386; GFX10-WGP:       ; %bb.0: ; %entry
11387; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11388; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11389; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11390; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11391; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11392; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11393; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11394; GFX10-WGP-NEXT:    s_endpgm
11395;
11396; GFX10-CU-LABEL: global_agent_one_as_monotonic_load:
11397; GFX10-CU:       ; %bb.0: ; %entry
11398; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11399; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11400; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11401; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11402; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11403; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11404; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11405; GFX10-CU-NEXT:    s_endpgm
11406;
11407; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load:
11408; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11409; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11410; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
11411; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11412; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11413; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
11414; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11415; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
11416; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
11417; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11418; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
11419; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
11420; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
11421; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
11422; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11423; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11424; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
11425; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
11426; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
11427; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
11428; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11429; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11430; SKIP-CACHE-INV-NEXT:    s_endpgm
11431;
11432; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load:
11433; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11434; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11435; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11436; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11437; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11438; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11439; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11440; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11441; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11442;
11443; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_load:
11444; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11445; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11446; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11447; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11448; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11449; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11450; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11451; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11452; GFX90A-TGSPLIT-NEXT:    s_endpgm
11453;
11454; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_load:
11455; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11456; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11457; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11458; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11459; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11460; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11461; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11462; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11463; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11464;
11465; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_load:
11466; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11467; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11468; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11469; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11470; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11471; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11472; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11473; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11474; GFX940-TGSPLIT-NEXT:    s_endpgm
11475;
11476; GFX11-WGP-LABEL: global_agent_one_as_monotonic_load:
11477; GFX11-WGP:       ; %bb.0: ; %entry
11478; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11479; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11480; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11481; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11482; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11483; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11484; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11485; GFX11-WGP-NEXT:    s_endpgm
11486;
11487; GFX11-CU-LABEL: global_agent_one_as_monotonic_load:
11488; GFX11-CU:       ; %bb.0: ; %entry
11489; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11490; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11491; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11492; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11493; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11494; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11495; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11496; GFX11-CU-NEXT:    s_endpgm
11497;
11498; GFX12-WGP-LABEL: global_agent_one_as_monotonic_load:
11499; GFX12-WGP:       ; %bb.0: ; %entry
11500; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11501; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11502; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11503; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11504; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11505; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11506; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11507; GFX12-WGP-NEXT:    s_endpgm
11508;
11509; GFX12-CU-LABEL: global_agent_one_as_monotonic_load:
11510; GFX12-CU:       ; %bb.0: ; %entry
11511; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11512; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11513; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11514; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11515; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11516; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11517; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11518; GFX12-CU-NEXT:    s_endpgm
11519    ptr addrspace(1) %in, ptr addrspace(1) %out) {
11520entry:
11521  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") monotonic, align 4
11522  store i32 %val, ptr addrspace(1) %out
11523  ret void
11524}
11525
11526define amdgpu_kernel void @global_agent_one_as_acquire_load(
11527; GFX6-LABEL: global_agent_one_as_acquire_load:
11528; GFX6:       ; %bb.0: ; %entry
11529; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11530; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
11531; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11532; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11533; GFX6-NEXT:    s_mov_b32 s6, s9
11534; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
11535; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
11536; GFX6-NEXT:    s_mov_b32 s13, -1
11537; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
11538; GFX6-NEXT:    s_mov_b32 s9, s6
11539; GFX6-NEXT:    s_mov_b32 s10, s13
11540; GFX6-NEXT:    s_mov_b32 s11, s12
11541; GFX6-NEXT:    s_mov_b32 s14, s5
11542; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11543; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11544; GFX6-NEXT:    s_mov_b32 s5, s14
11545; GFX6-NEXT:    s_mov_b32 s6, s13
11546; GFX6-NEXT:    s_mov_b32 s7, s12
11547; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
11548; GFX6-NEXT:    s_waitcnt vmcnt(0)
11549; GFX6-NEXT:    buffer_wbinvl1
11550; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11551; GFX6-NEXT:    s_endpgm
11552;
11553; GFX7-LABEL: global_agent_one_as_acquire_load:
11554; GFX7:       ; %bb.0: ; %entry
11555; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11556; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
11557; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11558; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11559; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11560; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
11561; GFX7-NEXT:    s_waitcnt vmcnt(0)
11562; GFX7-NEXT:    buffer_wbinvl1_vol
11563; GFX7-NEXT:    v_mov_b32_e32 v0, s4
11564; GFX7-NEXT:    v_mov_b32_e32 v1, s5
11565; GFX7-NEXT:    flat_store_dword v[0:1], v2
11566; GFX7-NEXT:    s_endpgm
11567;
11568; GFX10-WGP-LABEL: global_agent_one_as_acquire_load:
11569; GFX10-WGP:       ; %bb.0: ; %entry
11570; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11571; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11572; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11573; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11574; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11575; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11576; GFX10-WGP-NEXT:    buffer_gl1_inv
11577; GFX10-WGP-NEXT:    buffer_gl0_inv
11578; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11579; GFX10-WGP-NEXT:    s_endpgm
11580;
11581; GFX10-CU-LABEL: global_agent_one_as_acquire_load:
11582; GFX10-CU:       ; %bb.0: ; %entry
11583; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11584; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11585; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11586; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11587; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11588; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11589; GFX10-CU-NEXT:    buffer_gl1_inv
11590; GFX10-CU-NEXT:    buffer_gl0_inv
11591; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11592; GFX10-CU-NEXT:    s_endpgm
11593;
11594; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load:
11595; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11596; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11597; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
11598; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11599; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11600; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
11601; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11602; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
11603; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
11604; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11605; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
11606; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
11607; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
11608; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
11609; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11610; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11611; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
11612; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
11613; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
11614; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
11615; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11616; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11617; SKIP-CACHE-INV-NEXT:    s_endpgm
11618;
11619; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load:
11620; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11621; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11622; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11623; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11624; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11625; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11626; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11627; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
11628; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11629; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11630;
11631; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_load:
11632; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11633; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11634; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11635; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11636; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11637; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11638; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11639; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
11640; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11641; GFX90A-TGSPLIT-NEXT:    s_endpgm
11642;
11643; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_load:
11644; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11645; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11646; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11647; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11648; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11649; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11650; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11651; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
11652; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11653; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11654;
11655; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_load:
11656; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11657; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11658; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11659; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11660; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11661; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11662; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11663; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
11664; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11665; GFX940-TGSPLIT-NEXT:    s_endpgm
11666;
11667; GFX11-WGP-LABEL: global_agent_one_as_acquire_load:
11668; GFX11-WGP:       ; %bb.0: ; %entry
11669; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11670; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11671; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11672; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11673; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11674; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11675; GFX11-WGP-NEXT:    buffer_gl1_inv
11676; GFX11-WGP-NEXT:    buffer_gl0_inv
11677; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11678; GFX11-WGP-NEXT:    s_endpgm
11679;
11680; GFX11-CU-LABEL: global_agent_one_as_acquire_load:
11681; GFX11-CU:       ; %bb.0: ; %entry
11682; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11683; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11684; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11685; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11686; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11687; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11688; GFX11-CU-NEXT:    buffer_gl1_inv
11689; GFX11-CU-NEXT:    buffer_gl0_inv
11690; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11691; GFX11-CU-NEXT:    s_endpgm
11692;
11693; GFX12-WGP-LABEL: global_agent_one_as_acquire_load:
11694; GFX12-WGP:       ; %bb.0: ; %entry
11695; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11696; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11697; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11698; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11699; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11700; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11701; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
11702; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11703; GFX12-WGP-NEXT:    s_endpgm
11704;
11705; GFX12-CU-LABEL: global_agent_one_as_acquire_load:
11706; GFX12-CU:       ; %bb.0: ; %entry
11707; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11708; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11709; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11710; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11711; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11712; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11713; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
11714; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11715; GFX12-CU-NEXT:    s_endpgm
11716    ptr addrspace(1) %in, ptr addrspace(1) %out) {
11717entry:
11718  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") acquire, align 4
11719  store i32 %val, ptr addrspace(1) %out
11720  ret void
11721}
11722
11723define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
11724; GFX6-LABEL: global_agent_one_as_seq_cst_load:
11725; GFX6:       ; %bb.0: ; %entry
11726; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11727; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
11728; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11729; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11730; GFX6-NEXT:    s_mov_b32 s6, s9
11731; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
11732; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
11733; GFX6-NEXT:    s_mov_b32 s13, -1
11734; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
11735; GFX6-NEXT:    s_mov_b32 s9, s6
11736; GFX6-NEXT:    s_mov_b32 s10, s13
11737; GFX6-NEXT:    s_mov_b32 s11, s12
11738; GFX6-NEXT:    s_mov_b32 s14, s5
11739; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11740; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11741; GFX6-NEXT:    s_mov_b32 s5, s14
11742; GFX6-NEXT:    s_mov_b32 s6, s13
11743; GFX6-NEXT:    s_mov_b32 s7, s12
11744; GFX6-NEXT:    s_waitcnt vmcnt(0)
11745; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
11746; GFX6-NEXT:    s_waitcnt vmcnt(0)
11747; GFX6-NEXT:    buffer_wbinvl1
11748; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11749; GFX6-NEXT:    s_endpgm
11750;
11751; GFX7-LABEL: global_agent_one_as_seq_cst_load:
11752; GFX7:       ; %bb.0: ; %entry
11753; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11754; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
11755; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11756; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11757; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11758; GFX7-NEXT:    s_waitcnt vmcnt(0)
11759; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
11760; GFX7-NEXT:    s_waitcnt vmcnt(0)
11761; GFX7-NEXT:    buffer_wbinvl1_vol
11762; GFX7-NEXT:    v_mov_b32_e32 v0, s4
11763; GFX7-NEXT:    v_mov_b32_e32 v1, s5
11764; GFX7-NEXT:    flat_store_dword v[0:1], v2
11765; GFX7-NEXT:    s_endpgm
11766;
11767; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_load:
11768; GFX10-WGP:       ; %bb.0: ; %entry
11769; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11770; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11771; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11772; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11773; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11774; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11775; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11776; GFX10-WGP-NEXT:    buffer_gl1_inv
11777; GFX10-WGP-NEXT:    buffer_gl0_inv
11778; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11779; GFX10-WGP-NEXT:    s_endpgm
11780;
11781; GFX10-CU-LABEL: global_agent_one_as_seq_cst_load:
11782; GFX10-CU:       ; %bb.0: ; %entry
11783; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11784; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11785; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11786; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11787; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11788; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
11789; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11790; GFX10-CU-NEXT:    buffer_gl1_inv
11791; GFX10-CU-NEXT:    buffer_gl0_inv
11792; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11793; GFX10-CU-NEXT:    s_endpgm
11794;
11795; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load:
11796; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11797; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11798; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
11799; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11800; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11801; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
11802; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11803; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
11804; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
11805; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11806; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
11807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
11808; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
11809; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
11810; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11811; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11812; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
11813; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
11814; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
11815; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11816; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
11817; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11818; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11819; SKIP-CACHE-INV-NEXT:    s_endpgm
11820;
11821; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load:
11822; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11823; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11824; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11825; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11826; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11827; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11828; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11829; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
11830; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11831; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11832;
11833; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load:
11834; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11835; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11836; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11837; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11838; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11839; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
11840; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11841; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
11842; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11843; GFX90A-TGSPLIT-NEXT:    s_endpgm
11844;
11845; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_load:
11846; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11847; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11848; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11849; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11850; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11851; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11852; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11853; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
11854; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11855; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11856;
11857; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_load:
11858; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11859; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11860; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
11861; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11862; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11863; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc1
11864; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11865; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
11866; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11867; GFX940-TGSPLIT-NEXT:    s_endpgm
11868;
11869; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_load:
11870; GFX11-WGP:       ; %bb.0: ; %entry
11871; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11872; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11873; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11874; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11875; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11876; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11877; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11878; GFX11-WGP-NEXT:    buffer_gl1_inv
11879; GFX11-WGP-NEXT:    buffer_gl0_inv
11880; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11881; GFX11-WGP-NEXT:    s_endpgm
11882;
11883; GFX11-CU-LABEL: global_agent_one_as_seq_cst_load:
11884; GFX11-CU:       ; %bb.0: ; %entry
11885; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11886; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11887; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11888; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
11889; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11890; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
11891; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11892; GFX11-CU-NEXT:    buffer_gl1_inv
11893; GFX11-CU-NEXT:    buffer_gl0_inv
11894; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11895; GFX11-CU-NEXT:    s_endpgm
11896;
11897; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_load:
11898; GFX12-WGP:       ; %bb.0: ; %entry
11899; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11900; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11901; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11902; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11903; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11904; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11905; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11906; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11907; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11908; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11909; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11910; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11911; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
11912; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11913; GFX12-WGP-NEXT:    s_endpgm
11914;
11915; GFX12-CU-LABEL: global_agent_one_as_seq_cst_load:
11916; GFX12-CU:       ; %bb.0: ; %entry
11917; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11918; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
11919; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11920; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11921; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11922; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11923; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11924; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11925; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV
11926; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11927; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11928; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11929; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
11930; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11931; GFX12-CU-NEXT:    s_endpgm
11932    ptr addrspace(1) %in, ptr addrspace(1) %out) {
11933entry:
11934  %val = load atomic i32, ptr addrspace(1) %in syncscope("agent-one-as") seq_cst, align 4
11935  store i32 %val, ptr addrspace(1) %out
11936  ret void
11937}
11938
11939define amdgpu_kernel void @global_agent_one_as_unordered_store(
11940; GFX6-LABEL: global_agent_one_as_unordered_store:
11941; GFX6:       ; %bb.0: ; %entry
11942; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11943; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
11944; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11945; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11946; GFX6-NEXT:    s_mov_b32 s11, s5
11947; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11948; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11949; GFX6-NEXT:    s_mov_b32 s10, -1
11950; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11951; GFX6-NEXT:    s_mov_b32 s5, s11
11952; GFX6-NEXT:    s_mov_b32 s6, s10
11953; GFX6-NEXT:    s_mov_b32 s7, s9
11954; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11955; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11956; GFX6-NEXT:    s_endpgm
11957;
11958; GFX7-LABEL: global_agent_one_as_unordered_store:
11959; GFX7:       ; %bb.0: ; %entry
11960; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
11961; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
11962; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11963; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11964; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11965; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11966; GFX7-NEXT:    flat_store_dword v[0:1], v2
11967; GFX7-NEXT:    s_endpgm
11968;
11969; GFX10-WGP-LABEL: global_agent_one_as_unordered_store:
11970; GFX10-WGP:       ; %bb.0: ; %entry
11971; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
11972; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11973; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11974; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11975; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11976; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11977; GFX10-WGP-NEXT:    s_endpgm
11978;
11979; GFX10-CU-LABEL: global_agent_one_as_unordered_store:
11980; GFX10-CU:       ; %bb.0: ; %entry
11981; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
11982; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11983; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11984; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11985; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11986; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11987; GFX10-CU-NEXT:    s_endpgm
11988;
11989; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store:
11990; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11991; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11992; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
11993; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11994; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11995; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11996; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11997; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11998; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11999; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12000; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12001; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12002; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12003; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12004; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12005; SKIP-CACHE-INV-NEXT:    s_endpgm
12006;
12007; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store:
12008; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12009; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12010; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12011; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12012; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12013; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12014; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12015; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12016;
12017; GFX90A-TGSPLIT-LABEL: global_agent_one_as_unordered_store:
12018; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12019; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12020; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12021; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12022; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12023; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12024; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12025; GFX90A-TGSPLIT-NEXT:    s_endpgm
12026;
12027; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_unordered_store:
12028; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12029; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12030; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12031; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12032; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12033; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12034; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12035; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12036;
12037; GFX940-TGSPLIT-LABEL: global_agent_one_as_unordered_store:
12038; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12039; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12040; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12041; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12042; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12043; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12044; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12045; GFX940-TGSPLIT-NEXT:    s_endpgm
12046;
12047; GFX11-WGP-LABEL: global_agent_one_as_unordered_store:
12048; GFX11-WGP:       ; %bb.0: ; %entry
12049; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12050; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12051; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12052; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12053; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12054; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12055; GFX11-WGP-NEXT:    s_endpgm
12056;
12057; GFX11-CU-LABEL: global_agent_one_as_unordered_store:
12058; GFX11-CU:       ; %bb.0: ; %entry
12059; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12060; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12061; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12062; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12063; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12064; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12065; GFX11-CU-NEXT:    s_endpgm
12066;
12067; GFX12-WGP-LABEL: global_agent_one_as_unordered_store:
12068; GFX12-WGP:       ; %bb.0: ; %entry
12069; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12070; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12071; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12072; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12073; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12074; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12075; GFX12-WGP-NEXT:    s_endpgm
12076;
12077; GFX12-CU-LABEL: global_agent_one_as_unordered_store:
12078; GFX12-CU:       ; %bb.0: ; %entry
12079; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12080; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12081; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12082; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12083; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12084; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12085; GFX12-CU-NEXT:    s_endpgm
12086    i32 %in, ptr addrspace(1) %out) {
12087entry:
12088  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") unordered, align 4
12089  ret void
12090}
12091
12092define amdgpu_kernel void @global_agent_one_as_monotonic_store(
12093; GFX6-LABEL: global_agent_one_as_monotonic_store:
12094; GFX6:       ; %bb.0: ; %entry
12095; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
12096; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
12097; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
12098; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12099; GFX6-NEXT:    s_mov_b32 s11, s5
12100; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12101; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12102; GFX6-NEXT:    s_mov_b32 s10, -1
12103; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12104; GFX6-NEXT:    s_mov_b32 s5, s11
12105; GFX6-NEXT:    s_mov_b32 s6, s10
12106; GFX6-NEXT:    s_mov_b32 s7, s9
12107; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12108; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12109; GFX6-NEXT:    s_endpgm
12110;
12111; GFX7-LABEL: global_agent_one_as_monotonic_store:
12112; GFX7:       ; %bb.0: ; %entry
12113; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
12114; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
12115; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12116; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12117; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12118; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12119; GFX7-NEXT:    flat_store_dword v[0:1], v2
12120; GFX7-NEXT:    s_endpgm
12121;
12122; GFX10-WGP-LABEL: global_agent_one_as_monotonic_store:
12123; GFX10-WGP:       ; %bb.0: ; %entry
12124; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
12125; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12126; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12127; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12128; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12129; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12130; GFX10-WGP-NEXT:    s_endpgm
12131;
12132; GFX10-CU-LABEL: global_agent_one_as_monotonic_store:
12133; GFX10-CU:       ; %bb.0: ; %entry
12134; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
12135; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12136; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12137; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12138; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12139; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12140; GFX10-CU-NEXT:    s_endpgm
12141;
12142; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store:
12143; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12144; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
12145; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
12146; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
12147; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12148; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12149; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12150; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12151; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12152; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12153; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12154; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12155; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12156; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12157; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12158; SKIP-CACHE-INV-NEXT:    s_endpgm
12159;
12160; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store:
12161; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12162; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12163; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12164; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12165; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12166; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12167; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12168; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12169;
12170; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_store:
12171; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12172; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12173; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12174; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12175; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12176; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12177; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12178; GFX90A-TGSPLIT-NEXT:    s_endpgm
12179;
12180; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_store:
12181; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12182; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12183; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12184; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12185; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12186; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12187; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12188; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12189;
12190; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_store:
12191; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12192; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12193; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12194; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12195; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12196; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12197; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12198; GFX940-TGSPLIT-NEXT:    s_endpgm
12199;
12200; GFX11-WGP-LABEL: global_agent_one_as_monotonic_store:
12201; GFX11-WGP:       ; %bb.0: ; %entry
12202; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12203; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12204; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12205; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12206; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12207; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12208; GFX11-WGP-NEXT:    s_endpgm
12209;
12210; GFX11-CU-LABEL: global_agent_one_as_monotonic_store:
12211; GFX11-CU:       ; %bb.0: ; %entry
12212; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12213; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12214; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12215; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12216; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12217; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12218; GFX11-CU-NEXT:    s_endpgm
12219;
12220; GFX12-WGP-LABEL: global_agent_one_as_monotonic_store:
12221; GFX12-WGP:       ; %bb.0: ; %entry
12222; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12223; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12224; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12225; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12226; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12227; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12228; GFX12-WGP-NEXT:    s_endpgm
12229;
12230; GFX12-CU-LABEL: global_agent_one_as_monotonic_store:
12231; GFX12-CU:       ; %bb.0: ; %entry
12232; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12233; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12234; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12235; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12236; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12237; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12238; GFX12-CU-NEXT:    s_endpgm
12239    i32 %in, ptr addrspace(1) %out) {
12240entry:
12241  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") monotonic, align 4
12242  ret void
12243}
12244
12245define amdgpu_kernel void @global_agent_one_as_release_store(
12246; GFX6-LABEL: global_agent_one_as_release_store:
12247; GFX6:       ; %bb.0: ; %entry
12248; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
12249; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
12250; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
12251; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12252; GFX6-NEXT:    s_mov_b32 s11, s5
12253; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12254; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12255; GFX6-NEXT:    s_mov_b32 s10, -1
12256; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12257; GFX6-NEXT:    s_mov_b32 s5, s11
12258; GFX6-NEXT:    s_mov_b32 s6, s10
12259; GFX6-NEXT:    s_mov_b32 s7, s9
12260; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12261; GFX6-NEXT:    s_waitcnt vmcnt(0)
12262; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12263; GFX6-NEXT:    s_endpgm
12264;
12265; GFX7-LABEL: global_agent_one_as_release_store:
12266; GFX7:       ; %bb.0: ; %entry
12267; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
12268; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
12269; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12270; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12271; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12272; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12273; GFX7-NEXT:    s_waitcnt vmcnt(0)
12274; GFX7-NEXT:    flat_store_dword v[0:1], v2
12275; GFX7-NEXT:    s_endpgm
12276;
12277; GFX10-WGP-LABEL: global_agent_one_as_release_store:
12278; GFX10-WGP:       ; %bb.0: ; %entry
12279; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
12280; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12281; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12282; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12283; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12284; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12285; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12286; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12287; GFX10-WGP-NEXT:    s_endpgm
12288;
12289; GFX10-CU-LABEL: global_agent_one_as_release_store:
12290; GFX10-CU:       ; %bb.0: ; %entry
12291; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
12292; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12293; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12294; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12295; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12296; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12297; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12298; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12299; GFX10-CU-NEXT:    s_endpgm
12300;
12301; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store:
12302; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12303; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
12304; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
12305; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
12306; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12307; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12308; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12309; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12310; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12311; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12312; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12313; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12314; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12315; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12316; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12317; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12318; SKIP-CACHE-INV-NEXT:    s_endpgm
12319;
12320; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_store:
12321; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12322; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12323; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12324; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12325; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12326; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12327; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12328; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12329; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12330;
12331; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_store:
12332; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12333; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12334; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12335; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12336; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12337; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12338; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12339; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12340; GFX90A-TGSPLIT-NEXT:    s_endpgm
12341;
12342; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_store:
12343; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12344; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12345; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12346; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12347; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12348; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12349; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
12350; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12351; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12352; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12353;
12354; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_store:
12355; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12356; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12357; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12358; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12359; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12360; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12361; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
12362; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12363; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12364; GFX940-TGSPLIT-NEXT:    s_endpgm
12365;
12366; GFX11-WGP-LABEL: global_agent_one_as_release_store:
12367; GFX11-WGP:       ; %bb.0: ; %entry
12368; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12369; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12370; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12371; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12372; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12373; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12374; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12375; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12376; GFX11-WGP-NEXT:    s_endpgm
12377;
12378; GFX11-CU-LABEL: global_agent_one_as_release_store:
12379; GFX11-CU:       ; %bb.0: ; %entry
12380; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12381; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12382; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12383; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12384; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12385; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12386; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12387; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12388; GFX11-CU-NEXT:    s_endpgm
12389;
12390; GFX12-WGP-LABEL: global_agent_one_as_release_store:
12391; GFX12-WGP:       ; %bb.0: ; %entry
12392; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12393; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12394; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12395; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12396; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12397; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12398; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12399; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12400; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12401; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12402; GFX12-WGP-NEXT:    s_endpgm
12403;
12404; GFX12-CU-LABEL: global_agent_one_as_release_store:
12405; GFX12-CU:       ; %bb.0: ; %entry
12406; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12407; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12408; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12409; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12410; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12411; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12412; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12413; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12414; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12415; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12416; GFX12-CU-NEXT:    s_endpgm
12417    i32 %in, ptr addrspace(1) %out) {
12418entry:
12419  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") release, align 4
12420  ret void
12421}
12422
12423define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
12424; GFX6-LABEL: global_agent_one_as_seq_cst_store:
12425; GFX6:       ; %bb.0: ; %entry
12426; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
12427; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
12428; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
12429; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12430; GFX6-NEXT:    s_mov_b32 s11, s5
12431; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12432; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12433; GFX6-NEXT:    s_mov_b32 s10, -1
12434; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12435; GFX6-NEXT:    s_mov_b32 s5, s11
12436; GFX6-NEXT:    s_mov_b32 s6, s10
12437; GFX6-NEXT:    s_mov_b32 s7, s9
12438; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12439; GFX6-NEXT:    s_waitcnt vmcnt(0)
12440; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12441; GFX6-NEXT:    s_endpgm
12442;
12443; GFX7-LABEL: global_agent_one_as_seq_cst_store:
12444; GFX7:       ; %bb.0: ; %entry
12445; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
12446; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
12447; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12448; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12449; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12450; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12451; GFX7-NEXT:    s_waitcnt vmcnt(0)
12452; GFX7-NEXT:    flat_store_dword v[0:1], v2
12453; GFX7-NEXT:    s_endpgm
12454;
12455; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_store:
12456; GFX10-WGP:       ; %bb.0: ; %entry
12457; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
12458; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12459; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12460; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12461; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12462; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12463; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12464; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12465; GFX10-WGP-NEXT:    s_endpgm
12466;
12467; GFX10-CU-LABEL: global_agent_one_as_seq_cst_store:
12468; GFX10-CU:       ; %bb.0: ; %entry
12469; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
12470; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12471; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12472; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12473; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12474; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12475; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12476; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12477; GFX10-CU-NEXT:    s_endpgm
12478;
12479; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store:
12480; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12481; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
12482; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
12483; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
12484; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12485; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12486; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12487; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12488; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12489; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12490; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12491; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12492; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12493; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12494; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12495; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12496; SKIP-CACHE-INV-NEXT:    s_endpgm
12497;
12498; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store:
12499; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12500; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12501; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12502; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12503; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12504; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12505; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12506; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12507; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12508;
12509; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store:
12510; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12511; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
12512; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
12513; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12514; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12515; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12516; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12517; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12518; GFX90A-TGSPLIT-NEXT:    s_endpgm
12519;
12520; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_store:
12521; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12522; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12523; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12524; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12525; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12526; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12527; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
12528; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12529; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12530; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12531;
12532; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_store:
12533; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12534; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
12535; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
12536; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12537; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12538; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12539; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
12540; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12541; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12542; GFX940-TGSPLIT-NEXT:    s_endpgm
12543;
12544; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_store:
12545; GFX11-WGP:       ; %bb.0: ; %entry
12546; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12547; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12548; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12549; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12550; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12551; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12552; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12553; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12554; GFX11-WGP-NEXT:    s_endpgm
12555;
12556; GFX11-CU-LABEL: global_agent_one_as_seq_cst_store:
12557; GFX11-CU:       ; %bb.0: ; %entry
12558; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12559; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12560; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12561; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12562; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12563; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12564; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12565; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12566; GFX11-CU-NEXT:    s_endpgm
12567;
12568; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_store:
12569; GFX12-WGP:       ; %bb.0: ; %entry
12570; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
12571; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12572; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12573; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12574; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12575; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12576; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12577; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12578; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12579; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12580; GFX12-WGP-NEXT:    s_endpgm
12581;
12582; GFX12-CU-LABEL: global_agent_one_as_seq_cst_store:
12583; GFX12-CU:       ; %bb.0: ; %entry
12584; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
12585; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
12586; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12587; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12588; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12589; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12590; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12591; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12592; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12593; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12594; GFX12-CU-NEXT:    s_endpgm
12595    i32 %in, ptr addrspace(1) %out) {
12596entry:
12597  store atomic i32 %in, ptr addrspace(1) %out syncscope("agent-one-as") seq_cst, align 4
12598  ret void
12599}
12600
12601define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
12602; GFX6-LABEL: global_agent_one_as_monotonic_atomicrmw:
12603; GFX6:       ; %bb.0: ; %entry
12604; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12605; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12606; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12607; GFX6-NEXT:    s_mov_b32 s11, s5
12608; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12609; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12610; GFX6-NEXT:    s_mov_b32 s10, -1
12611; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12612; GFX6-NEXT:    s_mov_b32 s5, s11
12613; GFX6-NEXT:    s_mov_b32 s6, s10
12614; GFX6-NEXT:    s_mov_b32 s7, s9
12615; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12616; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
12617; GFX6-NEXT:    s_endpgm
12618;
12619; GFX7-LABEL: global_agent_one_as_monotonic_atomicrmw:
12620; GFX7:       ; %bb.0: ; %entry
12621; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
12622; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
12623; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12624; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12625; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12626; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12627; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
12628; GFX7-NEXT:    s_endpgm
12629;
12630; GFX10-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw:
12631; GFX10-WGP:       ; %bb.0: ; %entry
12632; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12633; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12634; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12635; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12636; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12637; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
12638; GFX10-WGP-NEXT:    s_endpgm
12639;
12640; GFX10-CU-LABEL: global_agent_one_as_monotonic_atomicrmw:
12641; GFX10-CU:       ; %bb.0: ; %entry
12642; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12643; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12644; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12645; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12646; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12647; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
12648; GFX10-CU-NEXT:    s_endpgm
12649;
12650; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw:
12651; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12652; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12653; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12654; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12655; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12656; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12657; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12658; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12659; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12660; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12661; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12662; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12663; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12664; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
12665; SKIP-CACHE-INV-NEXT:    s_endpgm
12666;
12667; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw:
12668; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12669; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12670; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12671; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12672; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12673; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12674; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12675; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12676;
12677; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw:
12678; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12679; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12680; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12681; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12682; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12683; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12684; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12685; GFX90A-TGSPLIT-NEXT:    s_endpgm
12686;
12687; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw:
12688; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12689; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12690; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12691; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12692; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12693; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12694; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
12695; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12696;
12697; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_atomicrmw:
12698; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12699; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12700; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12701; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12702; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12703; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12704; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
12705; GFX940-TGSPLIT-NEXT:    s_endpgm
12706;
12707; GFX11-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw:
12708; GFX11-WGP:       ; %bb.0: ; %entry
12709; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12710; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12711; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12712; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12713; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12714; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12715; GFX11-WGP-NEXT:    s_endpgm
12716;
12717; GFX11-CU-LABEL: global_agent_one_as_monotonic_atomicrmw:
12718; GFX11-CU:       ; %bb.0: ; %entry
12719; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12720; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12721; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12722; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12723; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12724; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12725; GFX11-CU-NEXT:    s_endpgm
12726;
12727; GFX12-WGP-LABEL: global_agent_one_as_monotonic_atomicrmw:
12728; GFX12-WGP:       ; %bb.0: ; %entry
12729; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12730; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12731; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12732; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12733; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12734; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12735; GFX12-WGP-NEXT:    s_endpgm
12736;
12737; GFX12-CU-LABEL: global_agent_one_as_monotonic_atomicrmw:
12738; GFX12-CU:       ; %bb.0: ; %entry
12739; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12740; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12741; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12742; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12743; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12744; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12745; GFX12-CU-NEXT:    s_endpgm
12746    ptr addrspace(1) %out, i32 %in) {
12747entry:
12748  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") monotonic
12749  ret void
12750}
12751
12752define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
12753; GFX6-LABEL: global_agent_one_as_acquire_atomicrmw:
12754; GFX6:       ; %bb.0: ; %entry
12755; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12756; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12757; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12758; GFX6-NEXT:    s_mov_b32 s11, s5
12759; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12760; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12761; GFX6-NEXT:    s_mov_b32 s10, -1
12762; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12763; GFX6-NEXT:    s_mov_b32 s5, s11
12764; GFX6-NEXT:    s_mov_b32 s6, s10
12765; GFX6-NEXT:    s_mov_b32 s7, s9
12766; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12767; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
12768; GFX6-NEXT:    s_waitcnt vmcnt(0)
12769; GFX6-NEXT:    buffer_wbinvl1
12770; GFX6-NEXT:    s_endpgm
12771;
12772; GFX7-LABEL: global_agent_one_as_acquire_atomicrmw:
12773; GFX7:       ; %bb.0: ; %entry
12774; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
12775; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
12776; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12777; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12778; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12779; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12780; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
12781; GFX7-NEXT:    s_waitcnt vmcnt(0)
12782; GFX7-NEXT:    buffer_wbinvl1_vol
12783; GFX7-NEXT:    s_endpgm
12784;
12785; GFX10-WGP-LABEL: global_agent_one_as_acquire_atomicrmw:
12786; GFX10-WGP:       ; %bb.0: ; %entry
12787; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12788; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12789; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12790; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12791; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12792; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
12793; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12794; GFX10-WGP-NEXT:    buffer_gl1_inv
12795; GFX10-WGP-NEXT:    buffer_gl0_inv
12796; GFX10-WGP-NEXT:    s_endpgm
12797;
12798; GFX10-CU-LABEL: global_agent_one_as_acquire_atomicrmw:
12799; GFX10-CU:       ; %bb.0: ; %entry
12800; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12801; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12802; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12803; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12804; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12805; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
12806; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12807; GFX10-CU-NEXT:    buffer_gl1_inv
12808; GFX10-CU-NEXT:    buffer_gl0_inv
12809; GFX10-CU-NEXT:    s_endpgm
12810;
12811; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw:
12812; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12813; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12814; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12815; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12816; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12817; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12818; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12819; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12820; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12821; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12822; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12823; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12824; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12825; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
12826; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12827; SKIP-CACHE-INV-NEXT:    s_endpgm
12828;
12829; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw:
12830; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12831; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12832; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12833; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12834; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12835; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12836; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12837; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12838; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12839; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12840;
12841; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw:
12842; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12843; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12844; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12845; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12846; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12847; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12848; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12849; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12850; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12851; GFX90A-TGSPLIT-NEXT:    s_endpgm
12852;
12853; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw:
12854; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12855; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12856; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12857; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12858; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12859; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12860; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
12861; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12862; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
12863; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12864;
12865; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_atomicrmw:
12866; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12867; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12868; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12869; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12870; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12871; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12872; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
12873; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12874; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
12875; GFX940-TGSPLIT-NEXT:    s_endpgm
12876;
12877; GFX11-WGP-LABEL: global_agent_one_as_acquire_atomicrmw:
12878; GFX11-WGP:       ; %bb.0: ; %entry
12879; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12880; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12881; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12882; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12883; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12884; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12885; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12886; GFX11-WGP-NEXT:    buffer_gl1_inv
12887; GFX11-WGP-NEXT:    buffer_gl0_inv
12888; GFX11-WGP-NEXT:    s_endpgm
12889;
12890; GFX11-CU-LABEL: global_agent_one_as_acquire_atomicrmw:
12891; GFX11-CU:       ; %bb.0: ; %entry
12892; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12893; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12894; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12895; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12896; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12897; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12898; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12899; GFX11-CU-NEXT:    buffer_gl1_inv
12900; GFX11-CU-NEXT:    buffer_gl0_inv
12901; GFX11-CU-NEXT:    s_endpgm
12902;
12903; GFX12-WGP-LABEL: global_agent_one_as_acquire_atomicrmw:
12904; GFX12-WGP:       ; %bb.0: ; %entry
12905; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12906; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12907; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12908; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12909; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12910; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12911; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12912; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
12913; GFX12-WGP-NEXT:    s_endpgm
12914;
12915; GFX12-CU-LABEL: global_agent_one_as_acquire_atomicrmw:
12916; GFX12-CU:       ; %bb.0: ; %entry
12917; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12918; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12919; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12920; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12921; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12922; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
12923; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12924; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
12925; GFX12-CU-NEXT:    s_endpgm
12926    ptr addrspace(1) %out, i32 %in) {
12927entry:
12928  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
12929  ret void
12930}
12931
12932define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
12933; GFX6-LABEL: global_agent_one_as_release_atomicrmw:
12934; GFX6:       ; %bb.0: ; %entry
12935; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12936; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12937; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12938; GFX6-NEXT:    s_mov_b32 s11, s5
12939; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12940; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12941; GFX6-NEXT:    s_mov_b32 s10, -1
12942; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12943; GFX6-NEXT:    s_mov_b32 s5, s11
12944; GFX6-NEXT:    s_mov_b32 s6, s10
12945; GFX6-NEXT:    s_mov_b32 s7, s9
12946; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12947; GFX6-NEXT:    s_waitcnt vmcnt(0)
12948; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
12949; GFX6-NEXT:    s_endpgm
12950;
12951; GFX7-LABEL: global_agent_one_as_release_atomicrmw:
12952; GFX7:       ; %bb.0: ; %entry
12953; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
12954; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
12955; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12956; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12957; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12958; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12959; GFX7-NEXT:    s_waitcnt vmcnt(0)
12960; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
12961; GFX7-NEXT:    s_endpgm
12962;
12963; GFX10-WGP-LABEL: global_agent_one_as_release_atomicrmw:
12964; GFX10-WGP:       ; %bb.0: ; %entry
12965; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12966; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12967; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12968; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12969; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12970; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12971; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12972; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
12973; GFX10-WGP-NEXT:    s_endpgm
12974;
12975; GFX10-CU-LABEL: global_agent_one_as_release_atomicrmw:
12976; GFX10-CU:       ; %bb.0: ; %entry
12977; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12978; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12979; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12980; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12981; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12982; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12983; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12984; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
12985; GFX10-CU-NEXT:    s_endpgm
12986;
12987; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw:
12988; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12989; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12990; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12991; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12992; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12993; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12994; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12995; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12996; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12997; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12998; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12999; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
13000; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
13001; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13002; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
13003; SKIP-CACHE-INV-NEXT:    s_endpgm
13004;
13005; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw:
13006; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13007; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13008; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13009; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13010; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13011; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13012; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13013; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13014; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13015;
13016; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw:
13017; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13018; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13019; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13020; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13021; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13022; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13023; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13024; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13025; GFX90A-TGSPLIT-NEXT:    s_endpgm
13026;
13027; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_atomicrmw:
13028; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13029; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13030; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13031; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13032; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13033; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13034; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
13035; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13036; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13037; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13038;
13039; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_atomicrmw:
13040; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13041; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13042; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13043; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13044; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13045; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13046; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
13047; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13048; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13049; GFX940-TGSPLIT-NEXT:    s_endpgm
13050;
13051; GFX11-WGP-LABEL: global_agent_one_as_release_atomicrmw:
13052; GFX11-WGP:       ; %bb.0: ; %entry
13053; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13054; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13055; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13056; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13057; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
13058; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13059; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13060; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13061; GFX11-WGP-NEXT:    s_endpgm
13062;
13063; GFX11-CU-LABEL: global_agent_one_as_release_atomicrmw:
13064; GFX11-CU:       ; %bb.0: ; %entry
13065; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13066; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13067; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13068; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13069; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
13070; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13071; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13072; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13073; GFX11-CU-NEXT:    s_endpgm
13074;
13075; GFX12-WGP-LABEL: global_agent_one_as_release_atomicrmw:
13076; GFX12-WGP:       ; %bb.0: ; %entry
13077; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13078; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13079; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13080; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13081; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
13082; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13083; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13084; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13085; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13086; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13087; GFX12-WGP-NEXT:    s_endpgm
13088;
13089; GFX12-CU-LABEL: global_agent_one_as_release_atomicrmw:
13090; GFX12-CU:       ; %bb.0: ; %entry
13091; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13092; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13093; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13094; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13095; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
13096; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13097; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13098; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13099; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13100; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13101; GFX12-CU-NEXT:    s_endpgm
13102    ptr addrspace(1) %out, i32 %in) {
13103entry:
13104  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") release
13105  ret void
13106}
13107
13108define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
13109; GFX6-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13110; GFX6:       ; %bb.0: ; %entry
13111; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13112; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
13113; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13114; GFX6-NEXT:    s_mov_b32 s11, s5
13115; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13116; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
13117; GFX6-NEXT:    s_mov_b32 s10, -1
13118; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13119; GFX6-NEXT:    s_mov_b32 s5, s11
13120; GFX6-NEXT:    s_mov_b32 s6, s10
13121; GFX6-NEXT:    s_mov_b32 s7, s9
13122; GFX6-NEXT:    v_mov_b32_e32 v0, s8
13123; GFX6-NEXT:    s_waitcnt vmcnt(0)
13124; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
13125; GFX6-NEXT:    s_waitcnt vmcnt(0)
13126; GFX6-NEXT:    buffer_wbinvl1
13127; GFX6-NEXT:    s_endpgm
13128;
13129; GFX7-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13130; GFX7:       ; %bb.0: ; %entry
13131; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
13132; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
13133; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13134; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13135; GFX7-NEXT:    v_mov_b32_e32 v1, s7
13136; GFX7-NEXT:    v_mov_b32_e32 v2, s4
13137; GFX7-NEXT:    s_waitcnt vmcnt(0)
13138; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
13139; GFX7-NEXT:    s_waitcnt vmcnt(0)
13140; GFX7-NEXT:    buffer_wbinvl1_vol
13141; GFX7-NEXT:    s_endpgm
13142;
13143; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13144; GFX10-WGP:       ; %bb.0: ; %entry
13145; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13146; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13147; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
13148; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13149; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
13150; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13151; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13152; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
13153; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13154; GFX10-WGP-NEXT:    buffer_gl1_inv
13155; GFX10-WGP-NEXT:    buffer_gl0_inv
13156; GFX10-WGP-NEXT:    s_endpgm
13157;
13158; GFX10-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13159; GFX10-CU:       ; %bb.0: ; %entry
13160; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13161; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13162; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
13163; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13164; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
13165; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13166; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13167; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
13168; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13169; GFX10-CU-NEXT:    buffer_gl1_inv
13170; GFX10-CU-NEXT:    buffer_gl0_inv
13171; GFX10-CU-NEXT:    s_endpgm
13172;
13173; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13174; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13175; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13176; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
13177; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13178; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
13179; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13180; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
13181; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
13182; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13183; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
13184; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
13185; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
13186; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
13187; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13188; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
13189; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13190; SKIP-CACHE-INV-NEXT:    s_endpgm
13191;
13192; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13193; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13194; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13195; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13196; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13197; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13198; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13199; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13200; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13201; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13202; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13203; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13204;
13205; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13206; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13207; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13208; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13209; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13210; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13211; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13212; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13213; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13214; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13215; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13216; GFX90A-TGSPLIT-NEXT:    s_endpgm
13217;
13218; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13219; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13220; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13221; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13222; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13223; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13224; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13225; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
13226; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13227; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13228; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13229; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
13230; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13231;
13232; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13233; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13234; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13235; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13236; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13237; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13238; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13239; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
13240; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13241; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13242; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13243; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
13244; GFX940-TGSPLIT-NEXT:    s_endpgm
13245;
13246; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13247; GFX11-WGP:       ; %bb.0: ; %entry
13248; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13249; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13250; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13251; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13252; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
13253; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13254; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13255; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13256; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13257; GFX11-WGP-NEXT:    buffer_gl1_inv
13258; GFX11-WGP-NEXT:    buffer_gl0_inv
13259; GFX11-WGP-NEXT:    s_endpgm
13260;
13261; GFX11-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13262; GFX11-CU:       ; %bb.0: ; %entry
13263; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13264; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13265; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13266; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13267; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
13268; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13269; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13270; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13271; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13272; GFX11-CU-NEXT:    buffer_gl1_inv
13273; GFX11-CU-NEXT:    buffer_gl0_inv
13274; GFX11-CU-NEXT:    s_endpgm
13275;
13276; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13277; GFX12-WGP:       ; %bb.0: ; %entry
13278; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13279; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13280; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13281; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13282; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
13283; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13284; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13285; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13286; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13287; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13288; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13289; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
13290; GFX12-WGP-NEXT:    s_endpgm
13291;
13292; GFX12-CU-LABEL: global_agent_one_as_acq_rel_atomicrmw:
13293; GFX12-CU:       ; %bb.0: ; %entry
13294; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13295; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13296; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13297; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13298; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
13299; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13300; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13301; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13302; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13303; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13304; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13305; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
13306; GFX12-CU-NEXT:    s_endpgm
13307    ptr addrspace(1) %out, i32 %in) {
13308entry:
13309  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
13310  ret void
13311}
13312
13313define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
13314; GFX6-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13315; GFX6:       ; %bb.0: ; %entry
13316; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13317; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
13318; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13319; GFX6-NEXT:    s_mov_b32 s11, s5
13320; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13321; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
13322; GFX6-NEXT:    s_mov_b32 s10, -1
13323; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13324; GFX6-NEXT:    s_mov_b32 s5, s11
13325; GFX6-NEXT:    s_mov_b32 s6, s10
13326; GFX6-NEXT:    s_mov_b32 s7, s9
13327; GFX6-NEXT:    v_mov_b32_e32 v0, s8
13328; GFX6-NEXT:    s_waitcnt vmcnt(0)
13329; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
13330; GFX6-NEXT:    s_waitcnt vmcnt(0)
13331; GFX6-NEXT:    buffer_wbinvl1
13332; GFX6-NEXT:    s_endpgm
13333;
13334; GFX7-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13335; GFX7:       ; %bb.0: ; %entry
13336; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
13337; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
13338; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13339; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13340; GFX7-NEXT:    v_mov_b32_e32 v1, s7
13341; GFX7-NEXT:    v_mov_b32_e32 v2, s4
13342; GFX7-NEXT:    s_waitcnt vmcnt(0)
13343; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
13344; GFX7-NEXT:    s_waitcnt vmcnt(0)
13345; GFX7-NEXT:    buffer_wbinvl1_vol
13346; GFX7-NEXT:    s_endpgm
13347;
13348; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13349; GFX10-WGP:       ; %bb.0: ; %entry
13350; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13351; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13352; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
13353; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13354; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
13355; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13356; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13357; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
13358; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13359; GFX10-WGP-NEXT:    buffer_gl1_inv
13360; GFX10-WGP-NEXT:    buffer_gl0_inv
13361; GFX10-WGP-NEXT:    s_endpgm
13362;
13363; GFX10-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13364; GFX10-CU:       ; %bb.0: ; %entry
13365; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13366; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13367; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
13368; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13369; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
13370; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13371; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13372; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
13373; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13374; GFX10-CU-NEXT:    buffer_gl1_inv
13375; GFX10-CU-NEXT:    buffer_gl0_inv
13376; GFX10-CU-NEXT:    s_endpgm
13377;
13378; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13379; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13380; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13381; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
13382; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13383; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
13384; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13385; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
13386; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
13387; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13388; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
13389; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
13390; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
13391; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
13392; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13393; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
13394; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13395; SKIP-CACHE-INV-NEXT:    s_endpgm
13396;
13397; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13398; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13399; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13400; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13401; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13402; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13403; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13404; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13405; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13406; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13407; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13408; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13409;
13410; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13411; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13412; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13413; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13414; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13415; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13416; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13417; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13418; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
13419; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13420; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13421; GFX90A-TGSPLIT-NEXT:    s_endpgm
13422;
13423; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13424; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13425; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13426; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13427; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13428; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13429; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13430; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
13431; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13432; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13433; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13434; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
13435; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13436;
13437; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13438; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13439; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13440; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13441; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13442; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13443; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13444; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
13445; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13446; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1]
13447; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13448; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
13449; GFX940-TGSPLIT-NEXT:    s_endpgm
13450;
13451; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13452; GFX11-WGP:       ; %bb.0: ; %entry
13453; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13454; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13455; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13456; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13457; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
13458; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13459; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13460; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13461; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13462; GFX11-WGP-NEXT:    buffer_gl1_inv
13463; GFX11-WGP-NEXT:    buffer_gl0_inv
13464; GFX11-WGP-NEXT:    s_endpgm
13465;
13466; GFX11-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13467; GFX11-CU:       ; %bb.0: ; %entry
13468; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13469; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13470; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13471; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13472; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
13473; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13474; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13475; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
13476; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13477; GFX11-CU-NEXT:    buffer_gl1_inv
13478; GFX11-CU-NEXT:    buffer_gl0_inv
13479; GFX11-CU-NEXT:    s_endpgm
13480;
13481; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13482; GFX12-WGP:       ; %bb.0: ; %entry
13483; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13484; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13485; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13486; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13487; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
13488; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13489; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13490; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13491; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13492; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13493; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13494; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
13495; GFX12-WGP-NEXT:    s_endpgm
13496;
13497; GFX12-CU-LABEL: global_agent_one_as_seq_cst_atomicrmw:
13498; GFX12-CU:       ; %bb.0: ; %entry
13499; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13500; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13501; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13502; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13503; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
13504; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13505; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13506; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13507; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13508; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_DEV
13509; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13510; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
13511; GFX12-CU-NEXT:    s_endpgm
13512    ptr addrspace(1) %out, i32 %in) {
13513entry:
13514  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
13515  ret void
13516}
13517
13518define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
13519; GFX6-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13520; GFX6:       ; %bb.0: ; %entry
13521; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13522; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
13523; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13524; GFX6-NEXT:    s_mov_b32 s11, s5
13525; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13526; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
13527; GFX6-NEXT:    s_mov_b32 s10, -1
13528; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13529; GFX6-NEXT:    s_mov_b32 s5, s11
13530; GFX6-NEXT:    s_mov_b32 s6, s10
13531; GFX6-NEXT:    s_mov_b32 s7, s9
13532; GFX6-NEXT:    v_mov_b32_e32 v0, s8
13533; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
13534; GFX6-NEXT:    s_waitcnt vmcnt(0)
13535; GFX6-NEXT:    buffer_wbinvl1
13536; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
13537; GFX6-NEXT:    s_endpgm
13538;
13539; GFX7-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13540; GFX7:       ; %bb.0: ; %entry
13541; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13542; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
13543; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13544; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13545; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13546; GFX7-NEXT:    v_mov_b32_e32 v2, s6
13547; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
13548; GFX7-NEXT:    s_waitcnt vmcnt(0)
13549; GFX7-NEXT:    buffer_wbinvl1_vol
13550; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13551; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13552; GFX7-NEXT:    flat_store_dword v[0:1], v2
13553; GFX7-NEXT:    s_endpgm
13554;
13555; GFX10-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13556; GFX10-WGP:       ; %bb.0: ; %entry
13557; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13558; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13559; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
13560; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13561; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
13562; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13563; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13564; GFX10-WGP-NEXT:    buffer_gl1_inv
13565; GFX10-WGP-NEXT:    buffer_gl0_inv
13566; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
13567; GFX10-WGP-NEXT:    s_endpgm
13568;
13569; GFX10-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13570; GFX10-CU:       ; %bb.0: ; %entry
13571; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13572; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13573; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
13574; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13575; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
13576; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13577; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13578; GFX10-CU-NEXT:    buffer_gl1_inv
13579; GFX10-CU-NEXT:    buffer_gl0_inv
13580; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
13581; GFX10-CU-NEXT:    s_endpgm
13582;
13583; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13584; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13585; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13586; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
13587; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13588; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
13589; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13590; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
13591; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
13592; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13593; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
13594; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
13595; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
13596; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
13597; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
13598; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13599; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
13600; SKIP-CACHE-INV-NEXT:    s_endpgm
13601;
13602; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13603; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13604; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13605; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13606; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13607; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13608; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13609; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13610; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13611; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13612; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
13613; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13614;
13615; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13616; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13617; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13618; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13619; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13620; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13621; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13622; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13623; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13624; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13625; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
13626; GFX90A-TGSPLIT-NEXT:    s_endpgm
13627;
13628; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13629; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13630; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13631; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13632; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13633; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13634; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13635; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
13636; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13637; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
13638; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
13639; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13640;
13641; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13642; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13643; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13644; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13645; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13646; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13647; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13648; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
13649; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13650; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
13651; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
13652; GFX940-TGSPLIT-NEXT:    s_endpgm
13653;
13654; GFX11-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13655; GFX11-WGP:       ; %bb.0: ; %entry
13656; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13657; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13658; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13659; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13660; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
13661; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
13662; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13663; GFX11-WGP-NEXT:    buffer_gl1_inv
13664; GFX11-WGP-NEXT:    buffer_gl0_inv
13665; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
13666; GFX11-WGP-NEXT:    s_endpgm
13667;
13668; GFX11-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13669; GFX11-CU:       ; %bb.0: ; %entry
13670; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13671; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13672; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13673; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13674; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
13675; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
13676; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13677; GFX11-CU-NEXT:    buffer_gl1_inv
13678; GFX11-CU-NEXT:    buffer_gl0_inv
13679; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
13680; GFX11-CU-NEXT:    s_endpgm
13681;
13682; GFX12-WGP-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13683; GFX12-WGP:       ; %bb.0: ; %entry
13684; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13685; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13686; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13687; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13688; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
13689; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
13690; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13691; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
13692; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
13693; GFX12-WGP-NEXT:    s_endpgm
13694;
13695; GFX12-CU-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
13696; GFX12-CU:       ; %bb.0: ; %entry
13697; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13698; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13699; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13700; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13701; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
13702; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
13703; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13704; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
13705; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
13706; GFX12-CU-NEXT:    s_endpgm
13707    ptr addrspace(1) %out, i32 %in) {
13708entry:
13709  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acquire
13710  store i32 %val, ptr addrspace(1) %out, align 4
13711  ret void
13712}
13713
13714define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
13715; GFX6-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13716; GFX6:       ; %bb.0: ; %entry
13717; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13718; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
13719; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13720; GFX6-NEXT:    s_mov_b32 s11, s5
13721; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13722; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
13723; GFX6-NEXT:    s_mov_b32 s10, -1
13724; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13725; GFX6-NEXT:    s_mov_b32 s5, s11
13726; GFX6-NEXT:    s_mov_b32 s6, s10
13727; GFX6-NEXT:    s_mov_b32 s7, s9
13728; GFX6-NEXT:    v_mov_b32_e32 v0, s8
13729; GFX6-NEXT:    s_waitcnt vmcnt(0)
13730; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
13731; GFX6-NEXT:    s_waitcnt vmcnt(0)
13732; GFX6-NEXT:    buffer_wbinvl1
13733; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
13734; GFX6-NEXT:    s_endpgm
13735;
13736; GFX7-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13737; GFX7:       ; %bb.0: ; %entry
13738; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13739; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
13740; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13741; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13742; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13743; GFX7-NEXT:    v_mov_b32_e32 v2, s6
13744; GFX7-NEXT:    s_waitcnt vmcnt(0)
13745; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
13746; GFX7-NEXT:    s_waitcnt vmcnt(0)
13747; GFX7-NEXT:    buffer_wbinvl1_vol
13748; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13749; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13750; GFX7-NEXT:    flat_store_dword v[0:1], v2
13751; GFX7-NEXT:    s_endpgm
13752;
13753; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13754; GFX10-WGP:       ; %bb.0: ; %entry
13755; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13756; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13757; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
13758; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13759; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
13760; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13761; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13762; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13763; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13764; GFX10-WGP-NEXT:    buffer_gl1_inv
13765; GFX10-WGP-NEXT:    buffer_gl0_inv
13766; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
13767; GFX10-WGP-NEXT:    s_endpgm
13768;
13769; GFX10-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13770; GFX10-CU:       ; %bb.0: ; %entry
13771; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13772; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13773; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
13774; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13775; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
13776; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13777; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13778; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13779; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13780; GFX10-CU-NEXT:    buffer_gl1_inv
13781; GFX10-CU-NEXT:    buffer_gl0_inv
13782; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
13783; GFX10-CU-NEXT:    s_endpgm
13784;
13785; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13786; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13787; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13788; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
13789; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13790; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
13791; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13792; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
13793; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
13794; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13795; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
13796; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
13797; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
13798; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
13799; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13800; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
13801; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13802; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
13803; SKIP-CACHE-INV-NEXT:    s_endpgm
13804;
13805; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13806; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13807; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13808; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13809; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13810; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13811; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13812; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13813; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13814; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13815; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13816; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
13817; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13818;
13819; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13820; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13821; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13822; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13823; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
13824; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13825; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13826; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13827; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13828; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13829; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13830; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
13831; GFX90A-TGSPLIT-NEXT:    s_endpgm
13832;
13833; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13834; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13835; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13836; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13837; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13838; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13839; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13840; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
13841; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13842; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
13843; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13844; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
13845; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
13846; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13847;
13848; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13849; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13850; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13851; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13852; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
13853; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13854; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13855; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
13856; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13857; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
13858; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13859; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
13860; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
13861; GFX940-TGSPLIT-NEXT:    s_endpgm
13862;
13863; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13864; GFX11-WGP:       ; %bb.0: ; %entry
13865; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13866; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13867; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13868; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13869; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
13870; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13871; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13872; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
13873; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13874; GFX11-WGP-NEXT:    buffer_gl1_inv
13875; GFX11-WGP-NEXT:    buffer_gl0_inv
13876; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
13877; GFX11-WGP-NEXT:    s_endpgm
13878;
13879; GFX11-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13880; GFX11-CU:       ; %bb.0: ; %entry
13881; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13882; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13883; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13884; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13885; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
13886; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13887; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13888; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
13889; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13890; GFX11-CU-NEXT:    buffer_gl1_inv
13891; GFX11-CU-NEXT:    buffer_gl0_inv
13892; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
13893; GFX11-CU-NEXT:    s_endpgm
13894;
13895; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13896; GFX12-WGP:       ; %bb.0: ; %entry
13897; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13898; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13899; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
13900; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13901; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
13902; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13903; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13904; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13905; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13906; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
13907; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13908; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13909; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13910; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
13911; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
13912; GFX12-WGP-NEXT:    s_endpgm
13913;
13914; GFX12-CU-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
13915; GFX12-CU:       ; %bb.0: ; %entry
13916; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13917; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13918; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
13919; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13920; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
13921; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13922; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13923; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13924; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13925; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
13926; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13927; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13928; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13929; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
13930; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
13931; GFX12-CU-NEXT:    s_endpgm
13932    ptr addrspace(1) %out, i32 %in) {
13933entry:
13934  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") acq_rel
13935  store i32 %val, ptr addrspace(1) %out, align 4
13936  ret void
13937}
13938
13939define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
13940; GFX6-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
13941; GFX6:       ; %bb.0: ; %entry
13942; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13943; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
13944; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13945; GFX6-NEXT:    s_mov_b32 s11, s5
13946; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13947; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
13948; GFX6-NEXT:    s_mov_b32 s10, -1
13949; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13950; GFX6-NEXT:    s_mov_b32 s5, s11
13951; GFX6-NEXT:    s_mov_b32 s6, s10
13952; GFX6-NEXT:    s_mov_b32 s7, s9
13953; GFX6-NEXT:    v_mov_b32_e32 v0, s8
13954; GFX6-NEXT:    s_waitcnt vmcnt(0)
13955; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
13956; GFX6-NEXT:    s_waitcnt vmcnt(0)
13957; GFX6-NEXT:    buffer_wbinvl1
13958; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
13959; GFX6-NEXT:    s_endpgm
13960;
13961; GFX7-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
13962; GFX7:       ; %bb.0: ; %entry
13963; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13964; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
13965; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13966; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13967; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13968; GFX7-NEXT:    v_mov_b32_e32 v2, s6
13969; GFX7-NEXT:    s_waitcnt vmcnt(0)
13970; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
13971; GFX7-NEXT:    s_waitcnt vmcnt(0)
13972; GFX7-NEXT:    buffer_wbinvl1_vol
13973; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13974; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13975; GFX7-NEXT:    flat_store_dword v[0:1], v2
13976; GFX7-NEXT:    s_endpgm
13977;
13978; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
13979; GFX10-WGP:       ; %bb.0: ; %entry
13980; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13981; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13982; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
13983; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13984; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
13985; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13986; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13987; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
13988; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13989; GFX10-WGP-NEXT:    buffer_gl1_inv
13990; GFX10-WGP-NEXT:    buffer_gl0_inv
13991; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
13992; GFX10-WGP-NEXT:    s_endpgm
13993;
13994; GFX10-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
13995; GFX10-CU:       ; %bb.0: ; %entry
13996; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13997; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13998; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
13999; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14000; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
14001; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14002; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14003; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
14004; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14005; GFX10-CU-NEXT:    buffer_gl1_inv
14006; GFX10-CU-NEXT:    buffer_gl0_inv
14007; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
14008; GFX10-CU-NEXT:    s_endpgm
14009;
14010; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14011; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14012; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14013; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
14014; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14015; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
14016; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14017; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
14018; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
14019; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14020; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
14021; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
14022; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
14023; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
14024; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14025; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
14026; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14027; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
14028; SKIP-CACHE-INV-NEXT:    s_endpgm
14029;
14030; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14031; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14032; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14033; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14034; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
14035; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14036; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14037; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14038; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
14039; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14040; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14041; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
14042; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14043;
14044; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14045; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14046; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14047; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14048; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
14049; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14050; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14051; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14052; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
14053; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14054; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14055; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
14056; GFX90A-TGSPLIT-NEXT:    s_endpgm
14057;
14058; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14059; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14060; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14061; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14062; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
14063; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14064; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14065; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
14066; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14067; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
14068; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14069; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
14070; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
14071; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14072;
14073; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14074; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14075; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14076; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14077; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
14078; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14079; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14080; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
14081; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14082; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0
14083; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14084; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
14085; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
14086; GFX940-TGSPLIT-NEXT:    s_endpgm
14087;
14088; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14089; GFX11-WGP:       ; %bb.0: ; %entry
14090; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14091; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14092; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
14093; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14094; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
14095; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
14096; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14097; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
14098; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
14099; GFX11-WGP-NEXT:    buffer_gl1_inv
14100; GFX11-WGP-NEXT:    buffer_gl0_inv
14101; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
14102; GFX11-WGP-NEXT:    s_endpgm
14103;
14104; GFX11-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14105; GFX11-CU:       ; %bb.0: ; %entry
14106; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14107; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14108; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
14109; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14110; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
14111; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
14112; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14113; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
14114; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
14115; GFX11-CU-NEXT:    buffer_gl1_inv
14116; GFX11-CU-NEXT:    buffer_gl0_inv
14117; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
14118; GFX11-CU-NEXT:    s_endpgm
14119;
14120; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14121; GFX12-WGP:       ; %bb.0: ; %entry
14122; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14123; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14124; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
14125; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14126; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
14127; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
14128; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
14129; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
14130; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14131; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14132; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
14133; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
14134; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
14135; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
14136; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
14137; GFX12-WGP-NEXT:    s_endpgm
14138;
14139; GFX12-CU-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
14140; GFX12-CU:       ; %bb.0: ; %entry
14141; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14142; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14143; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
14144; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14145; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
14146; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
14147; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
14148; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
14149; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14150; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
14151; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
14152; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
14153; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
14154; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
14155; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
14156; GFX12-CU-NEXT:    s_endpgm
14157    ptr addrspace(1) %out, i32 %in) {
14158entry:
14159  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("agent-one-as") seq_cst
14160  store i32 %val, ptr addrspace(1) %out, align 4
14161  ret void
14162}
14163
14164define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
14165; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14166; GFX6:       ; %bb.0: ; %entry
14167; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14168; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14169; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14170; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14171; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14172; GFX6-NEXT:    s_mov_b32 s12, s5
14173; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14174; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14175; GFX6-NEXT:    s_mov_b32 s11, -1
14176; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14177; GFX6-NEXT:    s_mov_b32 s5, s12
14178; GFX6-NEXT:    s_mov_b32 s6, s11
14179; GFX6-NEXT:    s_mov_b32 s7, s10
14180; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14181; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14182; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14183; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14184; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14185; GFX6-NEXT:    s_endpgm
14186;
14187; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14188; GFX7:       ; %bb.0: ; %entry
14189; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14190; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14191; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14192; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14193; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14194; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14195; GFX7-NEXT:    s_mov_b32 s4, s8
14196; GFX7-NEXT:    s_mov_b32 s5, s9
14197; GFX7-NEXT:    s_mov_b32 s9, s10
14198; GFX7-NEXT:    s_mov_b32 s8, s11
14199; GFX7-NEXT:    s_add_u32 s4, s4, s9
14200; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14201; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14202; GFX7-NEXT:    s_mov_b32 s5, s8
14203; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14204; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14205; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14206; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14207; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14208; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14209; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14210; GFX7-NEXT:    s_endpgm
14211;
14212; GFX10-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14213; GFX10-WGP:       ; %bb.0: ; %entry
14214; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14215; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14216; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14217; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14218; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14219; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14220; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14221; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14222; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14223; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14224; GFX10-WGP-NEXT:    s_endpgm
14225;
14226; GFX10-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14227; GFX10-CU:       ; %bb.0: ; %entry
14228; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14229; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14230; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14231; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14232; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14233; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14234; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14235; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14236; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14237; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14238; GFX10-CU-NEXT:    s_endpgm
14239;
14240; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14241; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14242; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14243; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14244; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14245; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14246; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14247; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14248; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14249; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14250; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14251; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14252; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14253; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14254; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14255; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14256; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14257; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14258; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14259; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14260; SKIP-CACHE-INV-NEXT:    s_endpgm
14261;
14262; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14263; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14264; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14265; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14266; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14267; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14268; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14269; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14270; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14271; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14272; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14273; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14274; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14275;
14276; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14277; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14278; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14279; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14280; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14281; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14282; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14283; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14284; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14285; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14286; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14287; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14288; GFX90A-TGSPLIT-NEXT:    s_endpgm
14289;
14290; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14291; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14292; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14293; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14294; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14295; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14296; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14297; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14298; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14299; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14300; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14301; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14302; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14303;
14304; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14305; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14306; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14307; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14308; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14309; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14310; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14311; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14312; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14313; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14314; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14315; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14316; GFX940-TGSPLIT-NEXT:    s_endpgm
14317;
14318; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14319; GFX11-WGP:       ; %bb.0: ; %entry
14320; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14321; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14322; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14323; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14324; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14325; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14326; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14327; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14328; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14329; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14330; GFX11-WGP-NEXT:    s_endpgm
14331;
14332; GFX11-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14333; GFX11-CU:       ; %bb.0: ; %entry
14334; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14335; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14336; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14337; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14338; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14339; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14340; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14341; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14342; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14343; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14344; GFX11-CU-NEXT:    s_endpgm
14345;
14346; GFX12-WGP-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14347; GFX12-WGP:       ; %bb.0: ; %entry
14348; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14349; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14350; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14351; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14352; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14353; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14354; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14355; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14356; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14357; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14358; GFX12-WGP-NEXT:    s_endpgm
14359;
14360; GFX12-CU-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
14361; GFX12-CU:       ; %bb.0: ; %entry
14362; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14363; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14364; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14365; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14366; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14367; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14368; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14369; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14370; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14371; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14372; GFX12-CU-NEXT:    s_endpgm
14373    ptr addrspace(1) %out, i32 %in, i32 %old) {
14374entry:
14375  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14376  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
14377  ret void
14378}
14379
14380define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
14381; GFX6-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14382; GFX6:       ; %bb.0: ; %entry
14383; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14384; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14385; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14386; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14387; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14388; GFX6-NEXT:    s_mov_b32 s12, s5
14389; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14390; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14391; GFX6-NEXT:    s_mov_b32 s11, -1
14392; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14393; GFX6-NEXT:    s_mov_b32 s5, s12
14394; GFX6-NEXT:    s_mov_b32 s6, s11
14395; GFX6-NEXT:    s_mov_b32 s7, s10
14396; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14397; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14398; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14399; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14400; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14401; GFX6-NEXT:    s_waitcnt vmcnt(0)
14402; GFX6-NEXT:    buffer_wbinvl1
14403; GFX6-NEXT:    s_endpgm
14404;
14405; GFX7-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14406; GFX7:       ; %bb.0: ; %entry
14407; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14408; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14409; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14410; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14411; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14412; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14413; GFX7-NEXT:    s_mov_b32 s4, s8
14414; GFX7-NEXT:    s_mov_b32 s5, s9
14415; GFX7-NEXT:    s_mov_b32 s9, s10
14416; GFX7-NEXT:    s_mov_b32 s8, s11
14417; GFX7-NEXT:    s_add_u32 s4, s4, s9
14418; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14419; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14420; GFX7-NEXT:    s_mov_b32 s5, s8
14421; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14422; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14423; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14424; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14425; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14426; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14427; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14428; GFX7-NEXT:    s_waitcnt vmcnt(0)
14429; GFX7-NEXT:    buffer_wbinvl1_vol
14430; GFX7-NEXT:    s_endpgm
14431;
14432; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14433; GFX10-WGP:       ; %bb.0: ; %entry
14434; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14435; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14436; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14437; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14438; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14439; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14440; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14441; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14442; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14443; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14444; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14445; GFX10-WGP-NEXT:    buffer_gl1_inv
14446; GFX10-WGP-NEXT:    buffer_gl0_inv
14447; GFX10-WGP-NEXT:    s_endpgm
14448;
14449; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14450; GFX10-CU:       ; %bb.0: ; %entry
14451; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14452; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14453; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14454; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14455; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14456; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14457; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14458; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14459; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14460; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14461; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14462; GFX10-CU-NEXT:    buffer_gl1_inv
14463; GFX10-CU-NEXT:    buffer_gl0_inv
14464; GFX10-CU-NEXT:    s_endpgm
14465;
14466; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14467; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14468; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14469; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14470; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14471; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14472; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14473; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14474; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14475; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14476; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14477; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14478; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14479; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14480; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14481; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14482; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14483; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14484; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14485; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14486; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14487; SKIP-CACHE-INV-NEXT:    s_endpgm
14488;
14489; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14490; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14491; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14492; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14493; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14494; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14495; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14496; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14497; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14498; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14499; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14500; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14501; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14502; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14503; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14504;
14505; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14506; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14507; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14508; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14509; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14510; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14511; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14512; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14513; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14514; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14515; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14516; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14517; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14518; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14519; GFX90A-TGSPLIT-NEXT:    s_endpgm
14520;
14521; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14522; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14523; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14524; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14525; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14526; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14527; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14528; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14529; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14530; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14531; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14532; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14533; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14534; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
14535; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14536;
14537; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14538; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14539; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14540; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14541; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14542; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14543; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14544; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14545; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14546; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14547; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14548; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14549; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14550; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
14551; GFX940-TGSPLIT-NEXT:    s_endpgm
14552;
14553; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14554; GFX11-WGP:       ; %bb.0: ; %entry
14555; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14556; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14557; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14558; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14559; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14560; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14561; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14562; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14563; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14564; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14565; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14566; GFX11-WGP-NEXT:    buffer_gl1_inv
14567; GFX11-WGP-NEXT:    buffer_gl0_inv
14568; GFX11-WGP-NEXT:    s_endpgm
14569;
14570; GFX11-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14571; GFX11-CU:       ; %bb.0: ; %entry
14572; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14573; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14574; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14575; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14576; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14577; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14578; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14579; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14580; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14581; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14582; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14583; GFX11-CU-NEXT:    buffer_gl1_inv
14584; GFX11-CU-NEXT:    buffer_gl0_inv
14585; GFX11-CU-NEXT:    s_endpgm
14586;
14587; GFX12-WGP-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14588; GFX12-WGP:       ; %bb.0: ; %entry
14589; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14590; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14591; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14592; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14593; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14594; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14595; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14596; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14597; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14598; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14599; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14600; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
14601; GFX12-WGP-NEXT:    s_endpgm
14602;
14603; GFX12-CU-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
14604; GFX12-CU:       ; %bb.0: ; %entry
14605; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14606; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14607; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14608; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14609; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14610; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14611; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14612; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14613; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14614; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14615; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14616; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
14617; GFX12-CU-NEXT:    s_endpgm
14618    ptr addrspace(1) %out, i32 %in, i32 %old) {
14619entry:
14620  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14621  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
14622  ret void
14623}
14624
14625define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
14626; GFX6-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14627; GFX6:       ; %bb.0: ; %entry
14628; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14629; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14630; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14631; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14632; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14633; GFX6-NEXT:    s_mov_b32 s12, s5
14634; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14635; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14636; GFX6-NEXT:    s_mov_b32 s11, -1
14637; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14638; GFX6-NEXT:    s_mov_b32 s5, s12
14639; GFX6-NEXT:    s_mov_b32 s6, s11
14640; GFX6-NEXT:    s_mov_b32 s7, s10
14641; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14642; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14643; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14644; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14645; GFX6-NEXT:    s_waitcnt vmcnt(0)
14646; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14647; GFX6-NEXT:    s_endpgm
14648;
14649; GFX7-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14650; GFX7:       ; %bb.0: ; %entry
14651; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14652; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14653; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14654; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14655; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14656; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14657; GFX7-NEXT:    s_mov_b32 s4, s8
14658; GFX7-NEXT:    s_mov_b32 s5, s9
14659; GFX7-NEXT:    s_mov_b32 s9, s10
14660; GFX7-NEXT:    s_mov_b32 s8, s11
14661; GFX7-NEXT:    s_add_u32 s4, s4, s9
14662; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14663; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14664; GFX7-NEXT:    s_mov_b32 s5, s8
14665; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14666; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14667; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14668; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14669; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14670; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14671; GFX7-NEXT:    s_waitcnt vmcnt(0)
14672; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14673; GFX7-NEXT:    s_endpgm
14674;
14675; GFX10-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14676; GFX10-WGP:       ; %bb.0: ; %entry
14677; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14678; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14679; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14680; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14681; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14682; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14683; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14684; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14685; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14686; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
14687; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14688; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14689; GFX10-WGP-NEXT:    s_endpgm
14690;
14691; GFX10-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14692; GFX10-CU:       ; %bb.0: ; %entry
14693; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14694; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14695; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14696; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14697; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14698; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14699; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14700; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14701; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14702; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14703; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14704; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14705; GFX10-CU-NEXT:    s_endpgm
14706;
14707; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14708; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14709; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14710; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14711; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14712; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14713; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14714; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14715; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14716; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14717; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14718; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14719; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14720; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14721; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14722; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14723; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14724; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14725; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14726; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14727; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14728; SKIP-CACHE-INV-NEXT:    s_endpgm
14729;
14730; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14731; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14732; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14733; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14734; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14735; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14736; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14737; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14738; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14739; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14740; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14741; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14742; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14743; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14744;
14745; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14746; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14747; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14748; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14749; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14750; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14751; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14752; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14753; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14754; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14755; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14756; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14757; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14758; GFX90A-TGSPLIT-NEXT:    s_endpgm
14759;
14760; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14761; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14762; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14763; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14764; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14765; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14766; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14767; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14768; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14769; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14770; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14771; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
14772; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14773; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14774; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14775;
14776; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14777; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14778; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14779; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14780; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14781; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14782; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14783; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14784; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14785; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14786; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14787; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
14788; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14789; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
14790; GFX940-TGSPLIT-NEXT:    s_endpgm
14791;
14792; GFX11-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14793; GFX11-WGP:       ; %bb.0: ; %entry
14794; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14795; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14796; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14797; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14798; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14799; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14800; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14801; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14802; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14803; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
14804; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14805; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14806; GFX11-WGP-NEXT:    s_endpgm
14807;
14808; GFX11-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14809; GFX11-CU:       ; %bb.0: ; %entry
14810; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14811; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14812; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14813; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14814; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14815; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14816; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14817; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14818; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14819; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
14820; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14821; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14822; GFX11-CU-NEXT:    s_endpgm
14823;
14824; GFX12-WGP-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14825; GFX12-WGP:       ; %bb.0: ; %entry
14826; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14827; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14828; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14829; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14830; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14831; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14832; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14833; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14834; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14835; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
14836; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
14837; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
14838; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14839; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14840; GFX12-WGP-NEXT:    s_endpgm
14841;
14842; GFX12-CU-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
14843; GFX12-CU:       ; %bb.0: ; %entry
14844; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14845; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14846; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14847; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14848; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14849; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14850; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14851; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14852; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14853; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
14854; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
14855; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
14856; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14857; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
14858; GFX12-CU-NEXT:    s_endpgm
14859    ptr addrspace(1) %out, i32 %in, i32 %old) {
14860entry:
14861  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14862  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic
14863  ret void
14864}
14865
14866define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
14867; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14868; GFX6:       ; %bb.0: ; %entry
14869; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14870; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14871; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14872; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14873; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14874; GFX6-NEXT:    s_mov_b32 s12, s5
14875; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14876; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14877; GFX6-NEXT:    s_mov_b32 s11, -1
14878; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14879; GFX6-NEXT:    s_mov_b32 s5, s12
14880; GFX6-NEXT:    s_mov_b32 s6, s11
14881; GFX6-NEXT:    s_mov_b32 s7, s10
14882; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14883; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14884; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14885; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14886; GFX6-NEXT:    s_waitcnt vmcnt(0)
14887; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14888; GFX6-NEXT:    s_waitcnt vmcnt(0)
14889; GFX6-NEXT:    buffer_wbinvl1
14890; GFX6-NEXT:    s_endpgm
14891;
14892; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14893; GFX7:       ; %bb.0: ; %entry
14894; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14895; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14896; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14897; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14898; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14899; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14900; GFX7-NEXT:    s_mov_b32 s4, s8
14901; GFX7-NEXT:    s_mov_b32 s5, s9
14902; GFX7-NEXT:    s_mov_b32 s9, s10
14903; GFX7-NEXT:    s_mov_b32 s8, s11
14904; GFX7-NEXT:    s_add_u32 s4, s4, s9
14905; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14906; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14907; GFX7-NEXT:    s_mov_b32 s5, s8
14908; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14909; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14910; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14911; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14912; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14913; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14914; GFX7-NEXT:    s_waitcnt vmcnt(0)
14915; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14916; GFX7-NEXT:    s_waitcnt vmcnt(0)
14917; GFX7-NEXT:    buffer_wbinvl1_vol
14918; GFX7-NEXT:    s_endpgm
14919;
14920; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14921; GFX10-WGP:       ; %bb.0: ; %entry
14922; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14923; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14924; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14925; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14926; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14927; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14928; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14929; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14930; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14931; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
14932; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14933; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14934; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14935; GFX10-WGP-NEXT:    buffer_gl1_inv
14936; GFX10-WGP-NEXT:    buffer_gl0_inv
14937; GFX10-WGP-NEXT:    s_endpgm
14938;
14939; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14940; GFX10-CU:       ; %bb.0: ; %entry
14941; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14942; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14943; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14944; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14945; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14946; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14947; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14948; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14949; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14950; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14951; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14952; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14953; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14954; GFX10-CU-NEXT:    buffer_gl1_inv
14955; GFX10-CU-NEXT:    buffer_gl0_inv
14956; GFX10-CU-NEXT:    s_endpgm
14957;
14958; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14959; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14960; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14961; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14962; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14963; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14964; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14965; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14966; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14967; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14968; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14969; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14970; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14971; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14972; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14973; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14974; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14975; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14976; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14977; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14978; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14979; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14980; SKIP-CACHE-INV-NEXT:    s_endpgm
14981;
14982; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
14983; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14984; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14985; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14986; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14987; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14988; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14989; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14990; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14991; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14992; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14993; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14994; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14995; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14996; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14997; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14998;
14999; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15000; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15001; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15002; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15003; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15004; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15005; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15006; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15007; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15008; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15009; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15010; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15011; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15012; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15013; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15014; GFX90A-TGSPLIT-NEXT:    s_endpgm
15015;
15016; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15017; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15018; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15019; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15020; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15021; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15022; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15023; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15024; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15025; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15026; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15027; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
15028; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15029; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15030; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15031; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
15032; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15033;
15034; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15035; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15036; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15037; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15038; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15039; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15040; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15041; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15042; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15043; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15044; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15045; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
15046; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15047; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15048; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15049; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
15050; GFX940-TGSPLIT-NEXT:    s_endpgm
15051;
15052; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15053; GFX11-WGP:       ; %bb.0: ; %entry
15054; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15055; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15056; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15057; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15058; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15059; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15060; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15061; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15062; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15063; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
15064; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15065; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15066; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15067; GFX11-WGP-NEXT:    buffer_gl1_inv
15068; GFX11-WGP-NEXT:    buffer_gl0_inv
15069; GFX11-WGP-NEXT:    s_endpgm
15070;
15071; GFX11-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15072; GFX11-CU:       ; %bb.0: ; %entry
15073; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15074; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15075; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15076; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15077; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15078; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15079; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15080; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15081; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15082; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
15083; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15084; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15085; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15086; GFX11-CU-NEXT:    buffer_gl1_inv
15087; GFX11-CU-NEXT:    buffer_gl0_inv
15088; GFX11-CU-NEXT:    s_endpgm
15089;
15090; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15091; GFX12-WGP:       ; %bb.0: ; %entry
15092; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15093; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15094; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15095; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15096; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15097; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15098; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15099; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15100; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15101; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15102; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15103; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15104; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15105; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15106; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15107; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
15108; GFX12-WGP-NEXT:    s_endpgm
15109;
15110; GFX12-CU-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
15111; GFX12-CU:       ; %bb.0: ; %entry
15112; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15113; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15114; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15115; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15116; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15117; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15118; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15119; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15120; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15121; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15122; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15123; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15124; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15125; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15126; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15127; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
15128; GFX12-CU-NEXT:    s_endpgm
15129    ptr addrspace(1) %out, i32 %in, i32 %old) {
15130entry:
15131  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15132  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
15133  ret void
15134}
15135
15136define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
15137; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15138; GFX6:       ; %bb.0: ; %entry
15139; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15140; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15141; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15142; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15143; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15144; GFX6-NEXT:    s_mov_b32 s12, s5
15145; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15146; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15147; GFX6-NEXT:    s_mov_b32 s11, -1
15148; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15149; GFX6-NEXT:    s_mov_b32 s5, s12
15150; GFX6-NEXT:    s_mov_b32 s6, s11
15151; GFX6-NEXT:    s_mov_b32 s7, s10
15152; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15153; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15154; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15155; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15156; GFX6-NEXT:    s_waitcnt vmcnt(0)
15157; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15158; GFX6-NEXT:    s_waitcnt vmcnt(0)
15159; GFX6-NEXT:    buffer_wbinvl1
15160; GFX6-NEXT:    s_endpgm
15161;
15162; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15163; GFX7:       ; %bb.0: ; %entry
15164; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15165; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15166; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15167; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15168; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15169; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15170; GFX7-NEXT:    s_mov_b32 s4, s8
15171; GFX7-NEXT:    s_mov_b32 s5, s9
15172; GFX7-NEXT:    s_mov_b32 s9, s10
15173; GFX7-NEXT:    s_mov_b32 s8, s11
15174; GFX7-NEXT:    s_add_u32 s4, s4, s9
15175; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15176; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15177; GFX7-NEXT:    s_mov_b32 s5, s8
15178; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15179; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15180; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15181; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15182; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15183; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15184; GFX7-NEXT:    s_waitcnt vmcnt(0)
15185; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15186; GFX7-NEXT:    s_waitcnt vmcnt(0)
15187; GFX7-NEXT:    buffer_wbinvl1_vol
15188; GFX7-NEXT:    s_endpgm
15189;
15190; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15191; GFX10-WGP:       ; %bb.0: ; %entry
15192; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15193; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15194; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15195; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15196; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15197; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15198; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15199; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15200; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15201; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15202; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15203; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15204; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15205; GFX10-WGP-NEXT:    buffer_gl1_inv
15206; GFX10-WGP-NEXT:    buffer_gl0_inv
15207; GFX10-WGP-NEXT:    s_endpgm
15208;
15209; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15210; GFX10-CU:       ; %bb.0: ; %entry
15211; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15212; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15213; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15214; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15215; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15216; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15217; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15218; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15219; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15220; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15221; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15222; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15223; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15224; GFX10-CU-NEXT:    buffer_gl1_inv
15225; GFX10-CU-NEXT:    buffer_gl0_inv
15226; GFX10-CU-NEXT:    s_endpgm
15227;
15228; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15229; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15230; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15231; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15232; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15233; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15234; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15235; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15236; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15237; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15238; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15239; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15240; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15241; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15242; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15243; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15244; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15245; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15246; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15247; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15248; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15249; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15250; SKIP-CACHE-INV-NEXT:    s_endpgm
15251;
15252; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15253; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15254; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15255; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15256; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15257; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15258; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15259; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15260; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15261; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15262; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15263; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15264; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15265; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15266; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15267; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15268;
15269; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15270; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15271; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15272; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15273; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15274; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15275; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15276; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15277; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15278; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15279; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15280; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15281; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15282; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15283; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15284; GFX90A-TGSPLIT-NEXT:    s_endpgm
15285;
15286; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15287; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15288; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15289; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15290; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15291; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15292; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15293; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15294; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15295; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15296; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15297; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
15298; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15299; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15300; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15301; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
15302; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15303;
15304; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15305; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15306; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15307; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15308; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15309; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15310; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15311; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15312; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15313; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15314; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15315; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
15316; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15317; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15318; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15319; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
15320; GFX940-TGSPLIT-NEXT:    s_endpgm
15321;
15322; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15323; GFX11-WGP:       ; %bb.0: ; %entry
15324; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15325; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15326; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15327; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15328; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15329; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15330; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15331; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15332; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15333; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
15334; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15335; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15336; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15337; GFX11-WGP-NEXT:    buffer_gl1_inv
15338; GFX11-WGP-NEXT:    buffer_gl0_inv
15339; GFX11-WGP-NEXT:    s_endpgm
15340;
15341; GFX11-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15342; GFX11-CU:       ; %bb.0: ; %entry
15343; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15344; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15345; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15346; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15347; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15348; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15349; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15350; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15351; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15352; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
15353; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15354; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15355; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15356; GFX11-CU-NEXT:    buffer_gl1_inv
15357; GFX11-CU-NEXT:    buffer_gl0_inv
15358; GFX11-CU-NEXT:    s_endpgm
15359;
15360; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15361; GFX12-WGP:       ; %bb.0: ; %entry
15362; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15363; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15364; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15365; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15366; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15367; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15368; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15369; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15370; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15371; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15372; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15373; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15374; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15375; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15376; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15377; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
15378; GFX12-WGP-NEXT:    s_endpgm
15379;
15380; GFX12-CU-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
15381; GFX12-CU:       ; %bb.0: ; %entry
15382; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15383; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15384; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15385; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15386; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15387; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15388; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15389; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15390; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15391; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15392; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15393; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15394; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15395; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15396; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15397; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
15398; GFX12-CU-NEXT:    s_endpgm
15399    ptr addrspace(1) %out, i32 %in, i32 %old) {
15400entry:
15401  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15402  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
15403  ret void
15404}
15405
15406define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
15407; GFX6-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15408; GFX6:       ; %bb.0: ; %entry
15409; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15410; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15411; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15412; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15413; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15414; GFX6-NEXT:    s_mov_b32 s12, s5
15415; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15416; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15417; GFX6-NEXT:    s_mov_b32 s11, -1
15418; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15419; GFX6-NEXT:    s_mov_b32 s5, s12
15420; GFX6-NEXT:    s_mov_b32 s6, s11
15421; GFX6-NEXT:    s_mov_b32 s7, s10
15422; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15423; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15424; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15425; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15426; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15427; GFX6-NEXT:    s_waitcnt vmcnt(0)
15428; GFX6-NEXT:    buffer_wbinvl1
15429; GFX6-NEXT:    s_endpgm
15430;
15431; GFX7-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15432; GFX7:       ; %bb.0: ; %entry
15433; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15434; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15435; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15436; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15437; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15438; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15439; GFX7-NEXT:    s_mov_b32 s4, s8
15440; GFX7-NEXT:    s_mov_b32 s5, s9
15441; GFX7-NEXT:    s_mov_b32 s9, s10
15442; GFX7-NEXT:    s_mov_b32 s8, s11
15443; GFX7-NEXT:    s_add_u32 s4, s4, s9
15444; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15445; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15446; GFX7-NEXT:    s_mov_b32 s5, s8
15447; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15448; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15449; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15450; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15451; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15452; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15453; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15454; GFX7-NEXT:    s_waitcnt vmcnt(0)
15455; GFX7-NEXT:    buffer_wbinvl1_vol
15456; GFX7-NEXT:    s_endpgm
15457;
15458; GFX10-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15459; GFX10-WGP:       ; %bb.0: ; %entry
15460; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15461; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15462; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15463; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15464; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15465; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15466; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15467; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15468; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15469; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15470; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15471; GFX10-WGP-NEXT:    buffer_gl1_inv
15472; GFX10-WGP-NEXT:    buffer_gl0_inv
15473; GFX10-WGP-NEXT:    s_endpgm
15474;
15475; GFX10-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15476; GFX10-CU:       ; %bb.0: ; %entry
15477; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15478; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15479; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15480; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15481; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15482; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15483; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15484; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15485; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15486; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15487; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15488; GFX10-CU-NEXT:    buffer_gl1_inv
15489; GFX10-CU-NEXT:    buffer_gl0_inv
15490; GFX10-CU-NEXT:    s_endpgm
15491;
15492; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15493; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15494; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15495; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15496; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15497; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15498; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15499; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15500; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15501; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15502; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15503; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15504; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15505; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15506; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15507; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15508; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15509; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15510; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15511; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15512; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15513; SKIP-CACHE-INV-NEXT:    s_endpgm
15514;
15515; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15516; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15517; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15518; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15519; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15520; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15521; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15522; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15523; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15524; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15525; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15526; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15527; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15528; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15529; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15530;
15531; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15532; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15533; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15534; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15535; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15536; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15537; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15538; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15539; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15540; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15541; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15542; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15543; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15544; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15545; GFX90A-TGSPLIT-NEXT:    s_endpgm
15546;
15547; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15548; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15549; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15550; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15551; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15552; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15553; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15554; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15555; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15556; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15557; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15558; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15559; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15560; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
15561; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15562;
15563; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15564; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15565; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15566; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15567; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15568; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15569; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15570; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15571; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15572; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15573; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15574; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15575; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15576; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
15577; GFX940-TGSPLIT-NEXT:    s_endpgm
15578;
15579; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15580; GFX11-WGP:       ; %bb.0: ; %entry
15581; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15582; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15583; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15584; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15585; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15586; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15587; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15588; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15589; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15590; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15591; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15592; GFX11-WGP-NEXT:    buffer_gl1_inv
15593; GFX11-WGP-NEXT:    buffer_gl0_inv
15594; GFX11-WGP-NEXT:    s_endpgm
15595;
15596; GFX11-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15597; GFX11-CU:       ; %bb.0: ; %entry
15598; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15599; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15600; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15601; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15602; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15603; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15604; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15605; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15606; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15607; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15608; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15609; GFX11-CU-NEXT:    buffer_gl1_inv
15610; GFX11-CU-NEXT:    buffer_gl0_inv
15611; GFX11-CU-NEXT:    s_endpgm
15612;
15613; GFX12-WGP-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15614; GFX12-WGP:       ; %bb.0: ; %entry
15615; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15616; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15617; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15618; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15619; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15620; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15621; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15622; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15623; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15624; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15625; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15626; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
15627; GFX12-WGP-NEXT:    s_endpgm
15628;
15629; GFX12-CU-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
15630; GFX12-CU:       ; %bb.0: ; %entry
15631; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15632; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15633; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15634; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15635; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15636; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15637; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15638; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15639; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15640; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15641; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15642; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
15643; GFX12-CU-NEXT:    s_endpgm
15644    ptr addrspace(1) %out, i32 %in, i32 %old) {
15645entry:
15646  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15647  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
15648  ret void
15649}
15650
15651define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
15652; GFX6-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15653; GFX6:       ; %bb.0: ; %entry
15654; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15655; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15656; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15657; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15658; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15659; GFX6-NEXT:    s_mov_b32 s12, s5
15660; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15661; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15662; GFX6-NEXT:    s_mov_b32 s11, -1
15663; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15664; GFX6-NEXT:    s_mov_b32 s5, s12
15665; GFX6-NEXT:    s_mov_b32 s6, s11
15666; GFX6-NEXT:    s_mov_b32 s7, s10
15667; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15668; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15669; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15670; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15671; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15672; GFX6-NEXT:    s_waitcnt vmcnt(0)
15673; GFX6-NEXT:    buffer_wbinvl1
15674; GFX6-NEXT:    s_endpgm
15675;
15676; GFX7-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15677; GFX7:       ; %bb.0: ; %entry
15678; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15679; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15680; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15681; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15682; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15683; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15684; GFX7-NEXT:    s_mov_b32 s4, s8
15685; GFX7-NEXT:    s_mov_b32 s5, s9
15686; GFX7-NEXT:    s_mov_b32 s9, s10
15687; GFX7-NEXT:    s_mov_b32 s8, s11
15688; GFX7-NEXT:    s_add_u32 s4, s4, s9
15689; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15690; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15691; GFX7-NEXT:    s_mov_b32 s5, s8
15692; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15693; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15694; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15695; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15696; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15697; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15698; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15699; GFX7-NEXT:    s_waitcnt vmcnt(0)
15700; GFX7-NEXT:    buffer_wbinvl1_vol
15701; GFX7-NEXT:    s_endpgm
15702;
15703; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15704; GFX10-WGP:       ; %bb.0: ; %entry
15705; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15706; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15707; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15708; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15709; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15710; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15711; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15712; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15713; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15714; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15715; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15716; GFX10-WGP-NEXT:    buffer_gl1_inv
15717; GFX10-WGP-NEXT:    buffer_gl0_inv
15718; GFX10-WGP-NEXT:    s_endpgm
15719;
15720; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15721; GFX10-CU:       ; %bb.0: ; %entry
15722; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15723; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15724; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15725; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15726; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15727; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15728; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15729; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15730; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15731; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15732; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15733; GFX10-CU-NEXT:    buffer_gl1_inv
15734; GFX10-CU-NEXT:    buffer_gl0_inv
15735; GFX10-CU-NEXT:    s_endpgm
15736;
15737; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15738; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15739; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15740; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15741; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15742; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15743; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15744; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15745; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15746; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15747; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15748; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15749; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15750; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15751; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15752; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15753; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15754; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15755; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15756; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15757; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15758; SKIP-CACHE-INV-NEXT:    s_endpgm
15759;
15760; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15761; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15762; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15763; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15764; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15765; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15766; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15767; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15768; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15769; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15770; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15771; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15772; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15773; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15774; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15775;
15776; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15777; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15778; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15779; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15780; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15781; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15782; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15783; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15784; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15785; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15786; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15787; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15788; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15789; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15790; GFX90A-TGSPLIT-NEXT:    s_endpgm
15791;
15792; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15793; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15794; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15795; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15796; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15797; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15798; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15799; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15800; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15801; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15802; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15803; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15804; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15805; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
15806; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15807;
15808; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15809; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15810; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15811; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15812; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15813; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15814; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15815; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15816; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15817; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15818; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15819; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
15820; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15821; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
15822; GFX940-TGSPLIT-NEXT:    s_endpgm
15823;
15824; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15825; GFX11-WGP:       ; %bb.0: ; %entry
15826; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15827; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15828; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15829; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15830; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15831; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15832; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15833; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15834; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15835; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15836; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15837; GFX11-WGP-NEXT:    buffer_gl1_inv
15838; GFX11-WGP-NEXT:    buffer_gl0_inv
15839; GFX11-WGP-NEXT:    s_endpgm
15840;
15841; GFX11-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15842; GFX11-CU:       ; %bb.0: ; %entry
15843; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15844; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15845; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15846; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15847; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15848; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15849; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15850; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15851; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15852; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15853; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15854; GFX11-CU-NEXT:    buffer_gl1_inv
15855; GFX11-CU-NEXT:    buffer_gl0_inv
15856; GFX11-CU-NEXT:    s_endpgm
15857;
15858; GFX12-WGP-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15859; GFX12-WGP:       ; %bb.0: ; %entry
15860; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15861; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15862; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15863; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15864; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15865; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15866; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15867; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15868; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15869; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15870; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15871; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
15872; GFX12-WGP-NEXT:    s_endpgm
15873;
15874; GFX12-CU-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
15875; GFX12-CU:       ; %bb.0: ; %entry
15876; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15877; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15878; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15879; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15880; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15881; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15882; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15883; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15884; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15885; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
15886; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15887; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
15888; GFX12-CU-NEXT:    s_endpgm
15889    ptr addrspace(1) %out, i32 %in, i32 %old) {
15890entry:
15891  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15892  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
15893  ret void
15894}
15895
15896define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
15897; GFX6-LABEL: global_agent_one_as_release_acquire_cmpxchg:
15898; GFX6:       ; %bb.0: ; %entry
15899; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15900; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15901; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15902; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15903; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15904; GFX6-NEXT:    s_mov_b32 s12, s5
15905; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15906; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15907; GFX6-NEXT:    s_mov_b32 s11, -1
15908; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15909; GFX6-NEXT:    s_mov_b32 s5, s12
15910; GFX6-NEXT:    s_mov_b32 s6, s11
15911; GFX6-NEXT:    s_mov_b32 s7, s10
15912; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15913; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15914; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15915; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15916; GFX6-NEXT:    s_waitcnt vmcnt(0)
15917; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15918; GFX6-NEXT:    s_waitcnt vmcnt(0)
15919; GFX6-NEXT:    buffer_wbinvl1
15920; GFX6-NEXT:    s_endpgm
15921;
15922; GFX7-LABEL: global_agent_one_as_release_acquire_cmpxchg:
15923; GFX7:       ; %bb.0: ; %entry
15924; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15925; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15926; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15927; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15928; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15929; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15930; GFX7-NEXT:    s_mov_b32 s4, s8
15931; GFX7-NEXT:    s_mov_b32 s5, s9
15932; GFX7-NEXT:    s_mov_b32 s9, s10
15933; GFX7-NEXT:    s_mov_b32 s8, s11
15934; GFX7-NEXT:    s_add_u32 s4, s4, s9
15935; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15936; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15937; GFX7-NEXT:    s_mov_b32 s5, s8
15938; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15939; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15940; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15941; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15942; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15943; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15944; GFX7-NEXT:    s_waitcnt vmcnt(0)
15945; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15946; GFX7-NEXT:    s_waitcnt vmcnt(0)
15947; GFX7-NEXT:    buffer_wbinvl1_vol
15948; GFX7-NEXT:    s_endpgm
15949;
15950; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg:
15951; GFX10-WGP:       ; %bb.0: ; %entry
15952; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15953; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15954; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15955; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15956; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15957; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15958; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15959; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15960; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15961; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15962; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15963; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15964; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15965; GFX10-WGP-NEXT:    buffer_gl1_inv
15966; GFX10-WGP-NEXT:    buffer_gl0_inv
15967; GFX10-WGP-NEXT:    s_endpgm
15968;
15969; GFX10-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg:
15970; GFX10-CU:       ; %bb.0: ; %entry
15971; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15972; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15973; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15974; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15975; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15976; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15977; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15978; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15979; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15980; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15981; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15982; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15983; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15984; GFX10-CU-NEXT:    buffer_gl1_inv
15985; GFX10-CU-NEXT:    buffer_gl0_inv
15986; GFX10-CU-NEXT:    s_endpgm
15987;
15988; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg:
15989; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15990; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15991; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15992; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15993; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15994; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15995; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15996; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15997; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15998; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15999; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16000; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16001; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16002; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16003; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16004; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16005; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16006; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16007; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16008; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16009; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16010; SKIP-CACHE-INV-NEXT:    s_endpgm
16011;
16012; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16013; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16014; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16015; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16016; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16017; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16018; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16019; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16020; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16021; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16022; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16023; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16024; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16025; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16026; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16027; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16028;
16029; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16030; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16031; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16032; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16033; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16034; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16035; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16036; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16037; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16038; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16039; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16040; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16041; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16042; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16043; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16044; GFX90A-TGSPLIT-NEXT:    s_endpgm
16045;
16046; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16047; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16048; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16049; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16050; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16051; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16052; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16053; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16054; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16055; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16056; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16057; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
16058; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16059; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16060; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16061; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
16062; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16063;
16064; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16065; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16066; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16067; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16068; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16069; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16070; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16071; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16072; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16073; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16074; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16075; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
16076; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16077; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16078; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16079; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
16080; GFX940-TGSPLIT-NEXT:    s_endpgm
16081;
16082; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16083; GFX11-WGP:       ; %bb.0: ; %entry
16084; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16085; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16086; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16087; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16088; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16089; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16090; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16091; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16092; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16093; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16094; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16095; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16096; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16097; GFX11-WGP-NEXT:    buffer_gl1_inv
16098; GFX11-WGP-NEXT:    buffer_gl0_inv
16099; GFX11-WGP-NEXT:    s_endpgm
16100;
16101; GFX11-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16102; GFX11-CU:       ; %bb.0: ; %entry
16103; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16104; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16105; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16106; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16107; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16108; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16109; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16110; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16111; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16112; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16113; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16114; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16115; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16116; GFX11-CU-NEXT:    buffer_gl1_inv
16117; GFX11-CU-NEXT:    buffer_gl0_inv
16118; GFX11-CU-NEXT:    s_endpgm
16119;
16120; GFX12-WGP-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16121; GFX12-WGP:       ; %bb.0: ; %entry
16122; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16123; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16124; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16125; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16126; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16127; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16128; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16129; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16130; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16131; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16132; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16133; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16134; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16135; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16136; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16137; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
16138; GFX12-WGP-NEXT:    s_endpgm
16139;
16140; GFX12-CU-LABEL: global_agent_one_as_release_acquire_cmpxchg:
16141; GFX12-CU:       ; %bb.0: ; %entry
16142; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16143; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16144; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16145; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16146; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16147; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16148; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16149; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16150; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16151; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16152; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16153; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16154; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16155; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16156; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16157; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
16158; GFX12-CU-NEXT:    s_endpgm
16159    ptr addrspace(1) %out, i32 %in, i32 %old) {
16160entry:
16161  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16162  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
16163  ret void
16164}
16165
16166define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
16167; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16168; GFX6:       ; %bb.0: ; %entry
16169; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16170; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16171; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16172; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16173; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16174; GFX6-NEXT:    s_mov_b32 s12, s5
16175; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16176; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16177; GFX6-NEXT:    s_mov_b32 s11, -1
16178; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16179; GFX6-NEXT:    s_mov_b32 s5, s12
16180; GFX6-NEXT:    s_mov_b32 s6, s11
16181; GFX6-NEXT:    s_mov_b32 s7, s10
16182; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16183; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16184; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16185; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16186; GFX6-NEXT:    s_waitcnt vmcnt(0)
16187; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16188; GFX6-NEXT:    s_waitcnt vmcnt(0)
16189; GFX6-NEXT:    buffer_wbinvl1
16190; GFX6-NEXT:    s_endpgm
16191;
16192; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16193; GFX7:       ; %bb.0: ; %entry
16194; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16195; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16196; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16197; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16198; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16199; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16200; GFX7-NEXT:    s_mov_b32 s4, s8
16201; GFX7-NEXT:    s_mov_b32 s5, s9
16202; GFX7-NEXT:    s_mov_b32 s9, s10
16203; GFX7-NEXT:    s_mov_b32 s8, s11
16204; GFX7-NEXT:    s_add_u32 s4, s4, s9
16205; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16206; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16207; GFX7-NEXT:    s_mov_b32 s5, s8
16208; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16209; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16210; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16211; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16212; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16213; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16214; GFX7-NEXT:    s_waitcnt vmcnt(0)
16215; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16216; GFX7-NEXT:    s_waitcnt vmcnt(0)
16217; GFX7-NEXT:    buffer_wbinvl1_vol
16218; GFX7-NEXT:    s_endpgm
16219;
16220; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16221; GFX10-WGP:       ; %bb.0: ; %entry
16222; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16223; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16224; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16225; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16226; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16227; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16228; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16229; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16230; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16231; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16232; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16233; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16234; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16235; GFX10-WGP-NEXT:    buffer_gl1_inv
16236; GFX10-WGP-NEXT:    buffer_gl0_inv
16237; GFX10-WGP-NEXT:    s_endpgm
16238;
16239; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16240; GFX10-CU:       ; %bb.0: ; %entry
16241; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16242; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16243; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16244; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16245; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16246; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16247; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16248; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16249; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16250; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16251; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16252; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16253; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16254; GFX10-CU-NEXT:    buffer_gl1_inv
16255; GFX10-CU-NEXT:    buffer_gl0_inv
16256; GFX10-CU-NEXT:    s_endpgm
16257;
16258; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16259; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16260; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16261; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16262; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16263; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16264; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16265; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16266; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16267; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16268; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16269; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16270; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16271; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16272; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16273; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16274; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16275; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16276; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16277; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16278; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16279; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16280; SKIP-CACHE-INV-NEXT:    s_endpgm
16281;
16282; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16283; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16284; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16285; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16286; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16287; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16288; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16289; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16290; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16291; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16292; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16293; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16294; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16295; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16296; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16297; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16298;
16299; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16300; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16301; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16302; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16303; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16304; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16305; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16306; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16307; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16308; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16309; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16310; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16311; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16312; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16313; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16314; GFX90A-TGSPLIT-NEXT:    s_endpgm
16315;
16316; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16317; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16318; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16319; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16320; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16321; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16322; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16323; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16324; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16325; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16326; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16327; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
16328; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16329; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16330; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16331; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
16332; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16333;
16334; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16335; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16336; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16337; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16338; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16339; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16340; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16341; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16342; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16343; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16344; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16345; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
16346; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16347; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16348; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16349; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
16350; GFX940-TGSPLIT-NEXT:    s_endpgm
16351;
16352; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16353; GFX11-WGP:       ; %bb.0: ; %entry
16354; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16355; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16356; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16357; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16358; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16359; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16360; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16361; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16362; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16363; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16364; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16365; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16366; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16367; GFX11-WGP-NEXT:    buffer_gl1_inv
16368; GFX11-WGP-NEXT:    buffer_gl0_inv
16369; GFX11-WGP-NEXT:    s_endpgm
16370;
16371; GFX11-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16372; GFX11-CU:       ; %bb.0: ; %entry
16373; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16374; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16375; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16376; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16377; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16378; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16379; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16380; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16381; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16382; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16383; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16384; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16385; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16386; GFX11-CU-NEXT:    buffer_gl1_inv
16387; GFX11-CU-NEXT:    buffer_gl0_inv
16388; GFX11-CU-NEXT:    s_endpgm
16389;
16390; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16391; GFX12-WGP:       ; %bb.0: ; %entry
16392; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16393; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16394; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16395; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16396; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16397; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16398; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16399; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16400; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16401; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16402; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16403; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16404; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16405; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16406; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16407; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
16408; GFX12-WGP-NEXT:    s_endpgm
16409;
16410; GFX12-CU-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
16411; GFX12-CU:       ; %bb.0: ; %entry
16412; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16413; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16414; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16415; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16416; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16417; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16418; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16419; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16420; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16421; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16422; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16423; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16424; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16425; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16426; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16427; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
16428; GFX12-CU-NEXT:    s_endpgm
16429    ptr addrspace(1) %out, i32 %in, i32 %old) {
16430entry:
16431  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16432  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
16433  ret void
16434}
16435
16436define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
16437; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16438; GFX6:       ; %bb.0: ; %entry
16439; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16440; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16441; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16442; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16443; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16444; GFX6-NEXT:    s_mov_b32 s12, s5
16445; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16446; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16447; GFX6-NEXT:    s_mov_b32 s11, -1
16448; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16449; GFX6-NEXT:    s_mov_b32 s5, s12
16450; GFX6-NEXT:    s_mov_b32 s6, s11
16451; GFX6-NEXT:    s_mov_b32 s7, s10
16452; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16453; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16454; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16455; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16456; GFX6-NEXT:    s_waitcnt vmcnt(0)
16457; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16458; GFX6-NEXT:    s_waitcnt vmcnt(0)
16459; GFX6-NEXT:    buffer_wbinvl1
16460; GFX6-NEXT:    s_endpgm
16461;
16462; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16463; GFX7:       ; %bb.0: ; %entry
16464; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16465; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16466; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16467; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16468; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16469; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16470; GFX7-NEXT:    s_mov_b32 s4, s8
16471; GFX7-NEXT:    s_mov_b32 s5, s9
16472; GFX7-NEXT:    s_mov_b32 s9, s10
16473; GFX7-NEXT:    s_mov_b32 s8, s11
16474; GFX7-NEXT:    s_add_u32 s4, s4, s9
16475; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16476; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16477; GFX7-NEXT:    s_mov_b32 s5, s8
16478; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16479; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16480; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16481; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16482; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16483; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16484; GFX7-NEXT:    s_waitcnt vmcnt(0)
16485; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16486; GFX7-NEXT:    s_waitcnt vmcnt(0)
16487; GFX7-NEXT:    buffer_wbinvl1_vol
16488; GFX7-NEXT:    s_endpgm
16489;
16490; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16491; GFX10-WGP:       ; %bb.0: ; %entry
16492; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16493; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16494; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16495; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16496; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16497; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16498; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16499; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16500; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16501; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16502; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16503; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16504; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16505; GFX10-WGP-NEXT:    buffer_gl1_inv
16506; GFX10-WGP-NEXT:    buffer_gl0_inv
16507; GFX10-WGP-NEXT:    s_endpgm
16508;
16509; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16510; GFX10-CU:       ; %bb.0: ; %entry
16511; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16512; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16513; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16514; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16515; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16516; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16517; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16518; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16519; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16520; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16521; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16522; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16523; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16524; GFX10-CU-NEXT:    buffer_gl1_inv
16525; GFX10-CU-NEXT:    buffer_gl0_inv
16526; GFX10-CU-NEXT:    s_endpgm
16527;
16528; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16529; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16530; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16531; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16532; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16533; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16534; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16535; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16536; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16537; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16538; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16539; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16540; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16541; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16542; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16543; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16544; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16545; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16546; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16547; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16548; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16549; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16550; SKIP-CACHE-INV-NEXT:    s_endpgm
16551;
16552; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16553; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16554; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16555; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16556; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16557; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16558; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16559; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16560; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16561; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16562; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16563; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16564; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16565; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16566; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16567; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16568;
16569; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16570; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16571; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16572; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16573; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16574; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16575; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16576; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16577; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16578; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16579; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16580; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16581; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16582; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16583; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16584; GFX90A-TGSPLIT-NEXT:    s_endpgm
16585;
16586; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16587; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16588; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16589; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16590; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16591; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16592; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16593; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16594; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16595; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16596; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16597; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
16598; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16599; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16600; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16601; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
16602; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16603;
16604; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16605; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16606; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16607; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16608; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16609; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16610; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16611; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16612; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16613; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16614; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16615; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
16616; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16617; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16618; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16619; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
16620; GFX940-TGSPLIT-NEXT:    s_endpgm
16621;
16622; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16623; GFX11-WGP:       ; %bb.0: ; %entry
16624; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16625; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16626; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16627; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16628; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16629; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16630; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16631; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16632; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16633; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16634; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16635; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16636; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16637; GFX11-WGP-NEXT:    buffer_gl1_inv
16638; GFX11-WGP-NEXT:    buffer_gl0_inv
16639; GFX11-WGP-NEXT:    s_endpgm
16640;
16641; GFX11-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16642; GFX11-CU:       ; %bb.0: ; %entry
16643; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16644; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16645; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16646; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16647; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16648; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16649; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16650; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16651; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16652; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16653; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16654; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16655; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16656; GFX11-CU-NEXT:    buffer_gl1_inv
16657; GFX11-CU-NEXT:    buffer_gl0_inv
16658; GFX11-CU-NEXT:    s_endpgm
16659;
16660; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16661; GFX12-WGP:       ; %bb.0: ; %entry
16662; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16663; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16664; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16665; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16666; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16667; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16668; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16669; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16670; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16671; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16672; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16673; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16674; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16675; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16676; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16677; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
16678; GFX12-WGP-NEXT:    s_endpgm
16679;
16680; GFX12-CU-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
16681; GFX12-CU:       ; %bb.0: ; %entry
16682; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16683; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16684; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16685; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16686; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16687; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16688; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16689; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16690; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16691; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16692; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16693; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16694; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16695; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16696; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16697; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
16698; GFX12-CU-NEXT:    s_endpgm
16699    ptr addrspace(1) %out, i32 %in, i32 %old) {
16700entry:
16701  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16702  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
16703  ret void
16704}
16705
16706define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
16707; GFX6-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16708; GFX6:       ; %bb.0: ; %entry
16709; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16710; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16711; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16712; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16713; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16714; GFX6-NEXT:    s_mov_b32 s12, s5
16715; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16716; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16717; GFX6-NEXT:    s_mov_b32 s11, -1
16718; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16719; GFX6-NEXT:    s_mov_b32 s5, s12
16720; GFX6-NEXT:    s_mov_b32 s6, s11
16721; GFX6-NEXT:    s_mov_b32 s7, s10
16722; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16723; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16724; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16725; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16726; GFX6-NEXT:    s_waitcnt vmcnt(0)
16727; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16728; GFX6-NEXT:    s_waitcnt vmcnt(0)
16729; GFX6-NEXT:    buffer_wbinvl1
16730; GFX6-NEXT:    s_endpgm
16731;
16732; GFX7-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16733; GFX7:       ; %bb.0: ; %entry
16734; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16735; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16736; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16737; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16738; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16739; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16740; GFX7-NEXT:    s_mov_b32 s4, s8
16741; GFX7-NEXT:    s_mov_b32 s5, s9
16742; GFX7-NEXT:    s_mov_b32 s9, s10
16743; GFX7-NEXT:    s_mov_b32 s8, s11
16744; GFX7-NEXT:    s_add_u32 s4, s4, s9
16745; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16746; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16747; GFX7-NEXT:    s_mov_b32 s5, s8
16748; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16749; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16750; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16751; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16752; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16753; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16754; GFX7-NEXT:    s_waitcnt vmcnt(0)
16755; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16756; GFX7-NEXT:    s_waitcnt vmcnt(0)
16757; GFX7-NEXT:    buffer_wbinvl1_vol
16758; GFX7-NEXT:    s_endpgm
16759;
16760; GFX10-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16761; GFX10-WGP:       ; %bb.0: ; %entry
16762; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16763; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16764; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16765; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16766; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16767; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16768; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16769; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16770; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16771; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16772; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16773; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16774; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16775; GFX10-WGP-NEXT:    buffer_gl1_inv
16776; GFX10-WGP-NEXT:    buffer_gl0_inv
16777; GFX10-WGP-NEXT:    s_endpgm
16778;
16779; GFX10-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16780; GFX10-CU:       ; %bb.0: ; %entry
16781; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16782; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16783; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16784; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16785; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16786; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16787; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16788; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16789; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16790; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16791; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16792; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16793; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16794; GFX10-CU-NEXT:    buffer_gl1_inv
16795; GFX10-CU-NEXT:    buffer_gl0_inv
16796; GFX10-CU-NEXT:    s_endpgm
16797;
16798; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16799; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16800; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16801; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16802; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16803; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16804; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16805; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16806; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16808; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16809; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16810; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16811; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16812; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16813; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16814; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16815; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16816; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16817; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16818; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16819; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16820; SKIP-CACHE-INV-NEXT:    s_endpgm
16821;
16822; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16823; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16824; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16825; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16826; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16827; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16828; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16829; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16830; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16831; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16832; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16833; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16834; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16835; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16836; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16837; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16838;
16839; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16840; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16841; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16842; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16843; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16844; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16845; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16846; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16847; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16848; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16849; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16850; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16851; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16852; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16853; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16854; GFX90A-TGSPLIT-NEXT:    s_endpgm
16855;
16856; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16857; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16858; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16859; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16860; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16861; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16862; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16863; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16864; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16865; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16866; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16867; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
16868; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16869; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16870; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16871; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
16872; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16873;
16874; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16875; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16876; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16877; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16878; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16879; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16880; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16881; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16882; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16883; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16884; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16885; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
16886; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16887; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
16888; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16889; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
16890; GFX940-TGSPLIT-NEXT:    s_endpgm
16891;
16892; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16893; GFX11-WGP:       ; %bb.0: ; %entry
16894; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16895; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16896; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16897; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16898; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16899; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16900; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16901; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16902; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16903; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16904; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16905; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16906; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16907; GFX11-WGP-NEXT:    buffer_gl1_inv
16908; GFX11-WGP-NEXT:    buffer_gl0_inv
16909; GFX11-WGP-NEXT:    s_endpgm
16910;
16911; GFX11-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16912; GFX11-CU:       ; %bb.0: ; %entry
16913; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16914; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16915; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16916; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16917; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16918; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16919; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16920; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16921; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16922; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16923; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16924; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16925; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16926; GFX11-CU-NEXT:    buffer_gl1_inv
16927; GFX11-CU-NEXT:    buffer_gl0_inv
16928; GFX11-CU-NEXT:    s_endpgm
16929;
16930; GFX12-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16931; GFX12-WGP:       ; %bb.0: ; %entry
16932; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16933; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16934; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16935; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16936; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16937; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16938; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16939; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16940; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16941; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16942; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16943; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16944; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16945; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16946; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16947; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
16948; GFX12-WGP-NEXT:    s_endpgm
16949;
16950; GFX12-CU-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
16951; GFX12-CU:       ; %bb.0: ; %entry
16952; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16953; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16954; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16955; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16956; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16957; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16958; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16959; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16960; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16961; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16962; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16963; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16964; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16965; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
16966; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16967; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
16968; GFX12-CU-NEXT:    s_endpgm
16969    ptr addrspace(1) %out, i32 %in, i32 %old) {
16970entry:
16971  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16972  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
16973  ret void
16974}
16975
16976define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
16977; GFX6-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
16978; GFX6:       ; %bb.0: ; %entry
16979; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16980; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16981; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16982; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16983; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16984; GFX6-NEXT:    s_mov_b32 s12, s5
16985; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16986; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16987; GFX6-NEXT:    s_mov_b32 s11, -1
16988; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16989; GFX6-NEXT:    s_mov_b32 s5, s12
16990; GFX6-NEXT:    s_mov_b32 s6, s11
16991; GFX6-NEXT:    s_mov_b32 s7, s10
16992; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16993; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16994; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16995; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16996; GFX6-NEXT:    s_waitcnt vmcnt(0)
16997; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16998; GFX6-NEXT:    s_waitcnt vmcnt(0)
16999; GFX6-NEXT:    buffer_wbinvl1
17000; GFX6-NEXT:    s_endpgm
17001;
17002; GFX7-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17003; GFX7:       ; %bb.0: ; %entry
17004; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
17005; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
17006; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
17007; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
17008; GFX7-NEXT:    s_mov_b64 s[10:11], 16
17009; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17010; GFX7-NEXT:    s_mov_b32 s4, s8
17011; GFX7-NEXT:    s_mov_b32 s5, s9
17012; GFX7-NEXT:    s_mov_b32 s9, s10
17013; GFX7-NEXT:    s_mov_b32 s8, s11
17014; GFX7-NEXT:    s_add_u32 s4, s4, s9
17015; GFX7-NEXT:    s_addc_u32 s8, s5, s8
17016; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
17017; GFX7-NEXT:    s_mov_b32 s5, s8
17018; GFX7-NEXT:    v_mov_b32_e32 v2, s7
17019; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17020; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17021; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17022; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17023; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17024; GFX7-NEXT:    s_waitcnt vmcnt(0)
17025; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
17026; GFX7-NEXT:    s_waitcnt vmcnt(0)
17027; GFX7-NEXT:    buffer_wbinvl1_vol
17028; GFX7-NEXT:    s_endpgm
17029;
17030; GFX10-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17031; GFX10-WGP:       ; %bb.0: ; %entry
17032; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17033; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17034; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17035; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17036; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17037; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17038; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17039; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17040; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17041; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17042; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17043; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17044; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17045; GFX10-WGP-NEXT:    buffer_gl1_inv
17046; GFX10-WGP-NEXT:    buffer_gl0_inv
17047; GFX10-WGP-NEXT:    s_endpgm
17048;
17049; GFX10-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17050; GFX10-CU:       ; %bb.0: ; %entry
17051; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17052; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17053; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17054; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17055; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17056; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17057; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17058; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17059; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17060; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17061; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17062; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17063; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17064; GFX10-CU-NEXT:    buffer_gl1_inv
17065; GFX10-CU-NEXT:    buffer_gl0_inv
17066; GFX10-CU-NEXT:    s_endpgm
17067;
17068; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17069; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17070; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17071; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17072; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17073; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17074; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17075; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17076; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17077; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17078; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17079; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17080; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17081; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17082; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17083; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17084; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17085; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17086; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17087; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17088; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
17089; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17090; SKIP-CACHE-INV-NEXT:    s_endpgm
17091;
17092; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17093; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17094; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17095; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17096; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17097; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17098; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17099; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17100; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17101; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17102; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17103; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17104; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17105; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17106; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17107; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17108;
17109; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17110; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17111; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17112; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17113; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17114; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17115; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17116; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17117; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17118; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17119; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17120; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17121; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17122; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17123; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17124; GFX90A-TGSPLIT-NEXT:    s_endpgm
17125;
17126; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17127; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17128; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17129; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17130; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17131; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17132; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17133; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17134; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17135; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17136; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17137; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
17138; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17139; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17140; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17141; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
17142; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17143;
17144; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17145; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17146; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17147; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17148; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17149; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17150; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17151; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17152; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17153; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17154; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17155; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
17156; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17157; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17158; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17159; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
17160; GFX940-TGSPLIT-NEXT:    s_endpgm
17161;
17162; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17163; GFX11-WGP:       ; %bb.0: ; %entry
17164; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17165; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17166; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17167; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17168; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17169; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17170; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17171; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17172; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17173; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17174; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17175; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17176; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17177; GFX11-WGP-NEXT:    buffer_gl1_inv
17178; GFX11-WGP-NEXT:    buffer_gl0_inv
17179; GFX11-WGP-NEXT:    s_endpgm
17180;
17181; GFX11-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17182; GFX11-CU:       ; %bb.0: ; %entry
17183; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17184; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17185; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17186; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17187; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17188; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17189; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17190; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17191; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17192; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17193; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17194; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17195; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17196; GFX11-CU-NEXT:    buffer_gl1_inv
17197; GFX11-CU-NEXT:    buffer_gl0_inv
17198; GFX11-CU-NEXT:    s_endpgm
17199;
17200; GFX12-WGP-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17201; GFX12-WGP:       ; %bb.0: ; %entry
17202; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17203; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17204; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17205; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17206; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17207; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17208; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17209; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17210; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17211; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
17212; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
17213; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17214; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17215; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17216; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17217; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
17218; GFX12-WGP-NEXT:    s_endpgm
17219;
17220; GFX12-CU-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
17221; GFX12-CU:       ; %bb.0: ; %entry
17222; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17223; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17224; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17225; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17226; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17227; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17228; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17229; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17230; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17231; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
17232; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
17233; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17234; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17235; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17236; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17237; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
17238; GFX12-CU-NEXT:    s_endpgm
17239    ptr addrspace(1) %out, i32 %in, i32 %old) {
17240entry:
17241  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17242  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
17243  ret void
17244}
17245
17246define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
17247; GFX6-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17248; GFX6:       ; %bb.0: ; %entry
17249; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17250; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17251; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17252; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17253; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17254; GFX6-NEXT:    s_mov_b32 s12, s5
17255; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17256; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17257; GFX6-NEXT:    s_mov_b32 s11, -1
17258; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17259; GFX6-NEXT:    s_mov_b32 s5, s12
17260; GFX6-NEXT:    s_mov_b32 s6, s11
17261; GFX6-NEXT:    s_mov_b32 s7, s10
17262; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17263; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17264; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17265; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17266; GFX6-NEXT:    s_waitcnt vmcnt(0)
17267; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
17268; GFX6-NEXT:    s_waitcnt vmcnt(0)
17269; GFX6-NEXT:    buffer_wbinvl1
17270; GFX6-NEXT:    s_endpgm
17271;
17272; GFX7-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17273; GFX7:       ; %bb.0: ; %entry
17274; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
17275; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
17276; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
17277; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
17278; GFX7-NEXT:    s_mov_b64 s[10:11], 16
17279; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17280; GFX7-NEXT:    s_mov_b32 s4, s8
17281; GFX7-NEXT:    s_mov_b32 s5, s9
17282; GFX7-NEXT:    s_mov_b32 s9, s10
17283; GFX7-NEXT:    s_mov_b32 s8, s11
17284; GFX7-NEXT:    s_add_u32 s4, s4, s9
17285; GFX7-NEXT:    s_addc_u32 s8, s5, s8
17286; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
17287; GFX7-NEXT:    s_mov_b32 s5, s8
17288; GFX7-NEXT:    v_mov_b32_e32 v2, s7
17289; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17290; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17291; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17292; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17293; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17294; GFX7-NEXT:    s_waitcnt vmcnt(0)
17295; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
17296; GFX7-NEXT:    s_waitcnt vmcnt(0)
17297; GFX7-NEXT:    buffer_wbinvl1_vol
17298; GFX7-NEXT:    s_endpgm
17299;
17300; GFX10-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17301; GFX10-WGP:       ; %bb.0: ; %entry
17302; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17303; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17304; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17305; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17306; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17307; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17308; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17309; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17310; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17311; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17312; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17313; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17314; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17315; GFX10-WGP-NEXT:    buffer_gl1_inv
17316; GFX10-WGP-NEXT:    buffer_gl0_inv
17317; GFX10-WGP-NEXT:    s_endpgm
17318;
17319; GFX10-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17320; GFX10-CU:       ; %bb.0: ; %entry
17321; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17322; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17323; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17324; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17325; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17326; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17327; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17328; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17329; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17330; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17331; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17332; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17333; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17334; GFX10-CU-NEXT:    buffer_gl1_inv
17335; GFX10-CU-NEXT:    buffer_gl0_inv
17336; GFX10-CU-NEXT:    s_endpgm
17337;
17338; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17339; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17340; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17341; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17342; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17343; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17344; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17345; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17346; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17347; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17348; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17349; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17350; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17351; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17352; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17353; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17354; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17355; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17356; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17357; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17358; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
17359; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17360; SKIP-CACHE-INV-NEXT:    s_endpgm
17361;
17362; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17363; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17364; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17365; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17366; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17367; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17368; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17369; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17370; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17371; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17372; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17373; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17374; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17375; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17376; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17377; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17378;
17379; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17380; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17381; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17382; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17383; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17384; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17385; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17386; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17387; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17388; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17389; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17390; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17391; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17392; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17393; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17394; GFX90A-TGSPLIT-NEXT:    s_endpgm
17395;
17396; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17397; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17398; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17399; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17400; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17401; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17402; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17403; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17404; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17405; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17406; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17407; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
17408; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17409; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17410; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17411; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
17412; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17413;
17414; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17415; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17416; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17417; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17418; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17419; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17420; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17421; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17422; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17423; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17424; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17425; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
17426; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17427; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17428; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17429; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
17430; GFX940-TGSPLIT-NEXT:    s_endpgm
17431;
17432; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17433; GFX11-WGP:       ; %bb.0: ; %entry
17434; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17435; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17436; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17437; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17438; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17439; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17440; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17441; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17442; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17443; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17444; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17445; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17446; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17447; GFX11-WGP-NEXT:    buffer_gl1_inv
17448; GFX11-WGP-NEXT:    buffer_gl0_inv
17449; GFX11-WGP-NEXT:    s_endpgm
17450;
17451; GFX11-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17452; GFX11-CU:       ; %bb.0: ; %entry
17453; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17454; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17455; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17456; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17457; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17458; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17459; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17460; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17461; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17462; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17463; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17464; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17465; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17466; GFX11-CU-NEXT:    buffer_gl1_inv
17467; GFX11-CU-NEXT:    buffer_gl0_inv
17468; GFX11-CU-NEXT:    s_endpgm
17469;
17470; GFX12-WGP-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17471; GFX12-WGP:       ; %bb.0: ; %entry
17472; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17473; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17474; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17475; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17476; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17477; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17478; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17479; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17480; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17481; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
17482; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
17483; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17484; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17485; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17486; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17487; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
17488; GFX12-WGP-NEXT:    s_endpgm
17489;
17490; GFX12-CU-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
17491; GFX12-CU:       ; %bb.0: ; %entry
17492; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17493; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17494; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17495; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17496; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17497; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17498; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17499; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17500; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17501; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
17502; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
17503; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17504; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17505; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17506; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17507; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
17508; GFX12-CU-NEXT:    s_endpgm
17509    ptr addrspace(1) %out, i32 %in, i32 %old) {
17510entry:
17511  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17512  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
17513  ret void
17514}
17515
17516define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
17517; GFX6-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17518; GFX6:       ; %bb.0: ; %entry
17519; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17520; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17521; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17522; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17523; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17524; GFX6-NEXT:    s_mov_b32 s12, s5
17525; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17526; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17527; GFX6-NEXT:    s_mov_b32 s11, -1
17528; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17529; GFX6-NEXT:    s_mov_b32 s5, s12
17530; GFX6-NEXT:    s_mov_b32 s6, s11
17531; GFX6-NEXT:    s_mov_b32 s7, s10
17532; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17533; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17534; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17535; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17536; GFX6-NEXT:    s_waitcnt vmcnt(0)
17537; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
17538; GFX6-NEXT:    s_waitcnt vmcnt(0)
17539; GFX6-NEXT:    buffer_wbinvl1
17540; GFX6-NEXT:    s_endpgm
17541;
17542; GFX7-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17543; GFX7:       ; %bb.0: ; %entry
17544; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
17545; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
17546; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
17547; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
17548; GFX7-NEXT:    s_mov_b64 s[10:11], 16
17549; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17550; GFX7-NEXT:    s_mov_b32 s4, s8
17551; GFX7-NEXT:    s_mov_b32 s5, s9
17552; GFX7-NEXT:    s_mov_b32 s9, s10
17553; GFX7-NEXT:    s_mov_b32 s8, s11
17554; GFX7-NEXT:    s_add_u32 s4, s4, s9
17555; GFX7-NEXT:    s_addc_u32 s8, s5, s8
17556; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
17557; GFX7-NEXT:    s_mov_b32 s5, s8
17558; GFX7-NEXT:    v_mov_b32_e32 v2, s7
17559; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17560; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17561; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17562; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17563; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17564; GFX7-NEXT:    s_waitcnt vmcnt(0)
17565; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
17566; GFX7-NEXT:    s_waitcnt vmcnt(0)
17567; GFX7-NEXT:    buffer_wbinvl1_vol
17568; GFX7-NEXT:    s_endpgm
17569;
17570; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17571; GFX10-WGP:       ; %bb.0: ; %entry
17572; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17573; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17574; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17575; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17576; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17577; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17578; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17579; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17580; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17581; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17582; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17583; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17584; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17585; GFX10-WGP-NEXT:    buffer_gl1_inv
17586; GFX10-WGP-NEXT:    buffer_gl0_inv
17587; GFX10-WGP-NEXT:    s_endpgm
17588;
17589; GFX10-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17590; GFX10-CU:       ; %bb.0: ; %entry
17591; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17592; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17593; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17594; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17595; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17596; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17597; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17598; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17599; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17600; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17601; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17602; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17603; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17604; GFX10-CU-NEXT:    buffer_gl1_inv
17605; GFX10-CU-NEXT:    buffer_gl0_inv
17606; GFX10-CU-NEXT:    s_endpgm
17607;
17608; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17609; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17610; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17611; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17612; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17613; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17614; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17615; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17616; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17617; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17618; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17619; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17620; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17621; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17622; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17623; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17624; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17625; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17626; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17627; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17628; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
17629; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17630; SKIP-CACHE-INV-NEXT:    s_endpgm
17631;
17632; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17633; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17634; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17635; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17636; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17637; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17638; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17639; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17640; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17641; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17642; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17643; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17644; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17645; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17646; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17647; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17648;
17649; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17650; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17651; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17652; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17653; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17654; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17655; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17656; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17657; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17658; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17659; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17660; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17661; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17662; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17663; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17664; GFX90A-TGSPLIT-NEXT:    s_endpgm
17665;
17666; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17667; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17668; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17669; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17670; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17671; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17672; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17673; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17674; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17675; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17676; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17677; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
17678; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17679; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17680; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17681; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
17682; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17683;
17684; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17685; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17686; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17687; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17688; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17689; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17690; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17691; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17692; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17693; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17694; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17695; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
17696; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17697; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17698; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17699; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
17700; GFX940-TGSPLIT-NEXT:    s_endpgm
17701;
17702; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17703; GFX11-WGP:       ; %bb.0: ; %entry
17704; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17705; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17706; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17707; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17708; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17709; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17710; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17711; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17712; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17713; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17714; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17715; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17716; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17717; GFX11-WGP-NEXT:    buffer_gl1_inv
17718; GFX11-WGP-NEXT:    buffer_gl0_inv
17719; GFX11-WGP-NEXT:    s_endpgm
17720;
17721; GFX11-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17722; GFX11-CU:       ; %bb.0: ; %entry
17723; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17724; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17725; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17726; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17727; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17728; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17729; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17730; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17731; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17732; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17733; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17734; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17735; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17736; GFX11-CU-NEXT:    buffer_gl1_inv
17737; GFX11-CU-NEXT:    buffer_gl0_inv
17738; GFX11-CU-NEXT:    s_endpgm
17739;
17740; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17741; GFX12-WGP:       ; %bb.0: ; %entry
17742; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17743; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17744; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17745; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17746; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17747; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17748; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17749; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17750; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17751; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
17752; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
17753; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17754; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17755; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17756; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17757; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
17758; GFX12-WGP-NEXT:    s_endpgm
17759;
17760; GFX12-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
17761; GFX12-CU:       ; %bb.0: ; %entry
17762; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17763; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17764; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17765; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17766; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17767; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17768; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17769; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17770; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17771; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
17772; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
17773; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17774; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17775; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
17776; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17777; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
17778; GFX12-CU-NEXT:    s_endpgm
17779    ptr addrspace(1) %out, i32 %in, i32 %old) {
17780entry:
17781  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17782  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
17783  ret void
17784}
17785
17786define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
17787; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17788; GFX6:       ; %bb.0: ; %entry
17789; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17790; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17791; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17792; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17793; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17794; GFX6-NEXT:    s_mov_b32 s12, s5
17795; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17796; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17797; GFX6-NEXT:    s_mov_b32 s11, -1
17798; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17799; GFX6-NEXT:    s_mov_b32 s5, s12
17800; GFX6-NEXT:    s_mov_b32 s6, s11
17801; GFX6-NEXT:    s_mov_b32 s7, s10
17802; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17803; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17804; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17805; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17806; GFX6-NEXT:    s_waitcnt vmcnt(0)
17807; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
17808; GFX6-NEXT:    s_waitcnt vmcnt(0)
17809; GFX6-NEXT:    buffer_wbinvl1
17810; GFX6-NEXT:    s_endpgm
17811;
17812; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17813; GFX7:       ; %bb.0: ; %entry
17814; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
17815; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
17816; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
17817; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
17818; GFX7-NEXT:    s_mov_b64 s[10:11], 16
17819; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17820; GFX7-NEXT:    s_mov_b32 s4, s8
17821; GFX7-NEXT:    s_mov_b32 s5, s9
17822; GFX7-NEXT:    s_mov_b32 s9, s10
17823; GFX7-NEXT:    s_mov_b32 s8, s11
17824; GFX7-NEXT:    s_add_u32 s4, s4, s9
17825; GFX7-NEXT:    s_addc_u32 s8, s5, s8
17826; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
17827; GFX7-NEXT:    s_mov_b32 s5, s8
17828; GFX7-NEXT:    v_mov_b32_e32 v2, s7
17829; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17830; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17831; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17832; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17833; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17834; GFX7-NEXT:    s_waitcnt vmcnt(0)
17835; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
17836; GFX7-NEXT:    s_waitcnt vmcnt(0)
17837; GFX7-NEXT:    buffer_wbinvl1_vol
17838; GFX7-NEXT:    s_endpgm
17839;
17840; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17841; GFX10-WGP:       ; %bb.0: ; %entry
17842; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17843; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17844; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17845; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17846; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17847; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17848; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17849; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17850; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17851; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17852; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17853; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17854; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17855; GFX10-WGP-NEXT:    buffer_gl1_inv
17856; GFX10-WGP-NEXT:    buffer_gl0_inv
17857; GFX10-WGP-NEXT:    s_endpgm
17858;
17859; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17860; GFX10-CU:       ; %bb.0: ; %entry
17861; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17862; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17863; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17864; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17865; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17866; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17867; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17868; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17869; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17870; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17871; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17872; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
17873; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17874; GFX10-CU-NEXT:    buffer_gl1_inv
17875; GFX10-CU-NEXT:    buffer_gl0_inv
17876; GFX10-CU-NEXT:    s_endpgm
17877;
17878; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17879; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17880; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17881; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17882; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17883; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17884; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17885; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17886; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17887; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17888; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17889; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17890; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17891; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17892; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17893; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17894; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17895; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17896; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17897; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17898; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
17899; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17900; SKIP-CACHE-INV-NEXT:    s_endpgm
17901;
17902; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17903; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17904; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17905; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17906; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17907; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17908; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17909; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17910; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17911; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17912; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17913; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17914; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17915; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17916; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17917; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17918;
17919; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17920; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17921; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17922; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17923; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17924; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17925; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17926; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17927; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17928; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17929; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17930; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17931; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
17932; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17933; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17934; GFX90A-TGSPLIT-NEXT:    s_endpgm
17935;
17936; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17937; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17938; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17939; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17940; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17941; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17942; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17943; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17944; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17945; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17946; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17947; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
17948; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17949; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17950; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17951; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
17952; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17953;
17954; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17955; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17956; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17957; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17958; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17959; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17960; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17961; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17962; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17963; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17964; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17965; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
17966; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17967; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16
17968; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17969; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
17970; GFX940-TGSPLIT-NEXT:    s_endpgm
17971;
17972; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17973; GFX11-WGP:       ; %bb.0: ; %entry
17974; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17975; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17976; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17977; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17978; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17979; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17980; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17981; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17982; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17983; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17984; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17985; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
17986; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17987; GFX11-WGP-NEXT:    buffer_gl1_inv
17988; GFX11-WGP-NEXT:    buffer_gl0_inv
17989; GFX11-WGP-NEXT:    s_endpgm
17990;
17991; GFX11-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
17992; GFX11-CU:       ; %bb.0: ; %entry
17993; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17994; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17995; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17996; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17997; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17998; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17999; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18000; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18001; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18002; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18003; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18004; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
18005; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18006; GFX11-CU-NEXT:    buffer_gl1_inv
18007; GFX11-CU-NEXT:    buffer_gl0_inv
18008; GFX11-CU-NEXT:    s_endpgm
18009;
18010; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
18011; GFX12-WGP:       ; %bb.0: ; %entry
18012; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18013; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18014; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18015; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18016; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18017; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18018; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18019; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18020; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18021; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18022; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18023; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18024; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
18025; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
18026; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
18027; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
18028; GFX12-WGP-NEXT:    s_endpgm
18029;
18030; GFX12-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
18031; GFX12-CU:       ; %bb.0: ; %entry
18032; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18033; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18034; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18035; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18036; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18037; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18038; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18039; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18040; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18041; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18042; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18043; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18044; GFX12-CU-NEXT:    s_wait_storecnt 0x0
18045; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_DEV
18046; GFX12-CU-NEXT:    s_wait_storecnt 0x0
18047; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
18048; GFX12-CU-NEXT:    s_endpgm
18049    ptr addrspace(1) %out, i32 %in, i32 %old) {
18050entry:
18051  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18052  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
18053  ret void
18054}
18055
18056define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
18057; GFX6-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18058; GFX6:       ; %bb.0: ; %entry
18059; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18060; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18061; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18062; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18063; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18064; GFX6-NEXT:    s_mov_b32 s12, s5
18065; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18066; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18067; GFX6-NEXT:    s_mov_b32 s11, -1
18068; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18069; GFX6-NEXT:    s_mov_b32 s5, s12
18070; GFX6-NEXT:    s_mov_b32 s6, s11
18071; GFX6-NEXT:    s_mov_b32 s7, s10
18072; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18073; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18074; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18075; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18076; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18077; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18078; GFX6-NEXT:    s_waitcnt vmcnt(0)
18079; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18080; GFX6-NEXT:    s_endpgm
18081;
18082; GFX7-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18083; GFX7:       ; %bb.0: ; %entry
18084; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18085; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18086; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18087; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18088; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18089; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18090; GFX7-NEXT:    s_mov_b32 s6, s4
18091; GFX7-NEXT:    s_mov_b32 s7, s5
18092; GFX7-NEXT:    s_mov_b32 s11, s12
18093; GFX7-NEXT:    s_mov_b32 s10, s13
18094; GFX7-NEXT:    s_add_u32 s6, s6, s11
18095; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18096; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18097; GFX7-NEXT:    s_mov_b32 s7, s10
18098; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18099; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18100; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18101; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18102; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18103; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18104; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18105; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18106; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18107; GFX7-NEXT:    s_waitcnt vmcnt(0)
18108; GFX7-NEXT:    flat_store_dword v[0:1], v2
18109; GFX7-NEXT:    s_endpgm
18110;
18111; GFX10-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18112; GFX10-WGP:       ; %bb.0: ; %entry
18113; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18114; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18115; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18116; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18117; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18118; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18119; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18120; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18121; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18122; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18123; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18124; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18125; GFX10-WGP-NEXT:    s_endpgm
18126;
18127; GFX10-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18128; GFX10-CU:       ; %bb.0: ; %entry
18129; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18130; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18131; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18132; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18133; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18134; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18135; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18136; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18137; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18138; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18139; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18140; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18141; GFX10-CU-NEXT:    s_endpgm
18142;
18143; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18144; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18145; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18146; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18147; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18148; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18149; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18150; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18151; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18152; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18153; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18154; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18155; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18156; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18157; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18158; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18159; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18160; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18161; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18162; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18163; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18164; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18165; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18166; SKIP-CACHE-INV-NEXT:    s_endpgm
18167;
18168; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18169; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18170; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18171; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18172; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18173; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18174; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18175; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18176; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18177; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18178; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18179; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18180; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18181; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18182; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18183;
18184; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18185; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18186; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18187; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18188; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18189; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18190; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18191; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18192; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18193; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18194; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18195; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18196; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18197; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18198; GFX90A-TGSPLIT-NEXT:    s_endpgm
18199;
18200; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18201; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18202; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18203; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18204; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18205; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18206; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18207; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18208; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18209; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18210; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18211; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18212; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18213; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18214; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18215;
18216; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18217; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18218; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18219; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18220; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18221; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18222; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18223; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18224; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18225; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18226; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18227; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18228; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18229; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18230; GFX940-TGSPLIT-NEXT:    s_endpgm
18231;
18232; GFX11-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18233; GFX11-WGP:       ; %bb.0: ; %entry
18234; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18235; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18236; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18237; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18238; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18239; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18240; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18241; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18242; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18243; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18244; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18245; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18246; GFX11-WGP-NEXT:    s_endpgm
18247;
18248; GFX11-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18249; GFX11-CU:       ; %bb.0: ; %entry
18250; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18251; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18252; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18253; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18254; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18255; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18256; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18257; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18258; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18259; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18260; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18261; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18262; GFX11-CU-NEXT:    s_endpgm
18263;
18264; GFX12-WGP-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18265; GFX12-WGP:       ; %bb.0: ; %entry
18266; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18267; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18268; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18269; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18270; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18271; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18272; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18273; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18274; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18275; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18276; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18277; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18278; GFX12-WGP-NEXT:    s_endpgm
18279;
18280; GFX12-CU-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
18281; GFX12-CU:       ; %bb.0: ; %entry
18282; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18283; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18284; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18285; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18286; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18287; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18288; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18289; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18290; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18291; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18292; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18293; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18294; GFX12-CU-NEXT:    s_endpgm
18295    ptr addrspace(1) %out, i32 %in, i32 %old) {
18296entry:
18297  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18298  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic
18299  %val0 = extractvalue { i32, i1 } %val, 0
18300  store i32 %val0, ptr addrspace(1) %out, align 4
18301  ret void
18302}
18303
18304define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
18305; GFX6-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18306; GFX6:       ; %bb.0: ; %entry
18307; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18308; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18309; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18310; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18311; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18312; GFX6-NEXT:    s_mov_b32 s12, s5
18313; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18314; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18315; GFX6-NEXT:    s_mov_b32 s11, -1
18316; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18317; GFX6-NEXT:    s_mov_b32 s5, s12
18318; GFX6-NEXT:    s_mov_b32 s6, s11
18319; GFX6-NEXT:    s_mov_b32 s7, s10
18320; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18321; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18322; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18323; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18324; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18325; GFX6-NEXT:    s_waitcnt vmcnt(0)
18326; GFX6-NEXT:    buffer_wbinvl1
18327; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18328; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18329; GFX6-NEXT:    s_endpgm
18330;
18331; GFX7-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18332; GFX7:       ; %bb.0: ; %entry
18333; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18334; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18335; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18336; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18337; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18338; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18339; GFX7-NEXT:    s_mov_b32 s6, s4
18340; GFX7-NEXT:    s_mov_b32 s7, s5
18341; GFX7-NEXT:    s_mov_b32 s11, s12
18342; GFX7-NEXT:    s_mov_b32 s10, s13
18343; GFX7-NEXT:    s_add_u32 s6, s6, s11
18344; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18345; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18346; GFX7-NEXT:    s_mov_b32 s7, s10
18347; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18348; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18349; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18350; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18351; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18352; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18353; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18354; GFX7-NEXT:    s_waitcnt vmcnt(0)
18355; GFX7-NEXT:    buffer_wbinvl1_vol
18356; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18357; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18358; GFX7-NEXT:    flat_store_dword v[0:1], v2
18359; GFX7-NEXT:    s_endpgm
18360;
18361; GFX10-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18362; GFX10-WGP:       ; %bb.0: ; %entry
18363; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18364; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18365; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18366; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18367; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18368; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18369; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18370; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18371; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18372; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18373; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18374; GFX10-WGP-NEXT:    buffer_gl1_inv
18375; GFX10-WGP-NEXT:    buffer_gl0_inv
18376; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18377; GFX10-WGP-NEXT:    s_endpgm
18378;
18379; GFX10-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18380; GFX10-CU:       ; %bb.0: ; %entry
18381; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18382; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18383; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18384; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18385; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18386; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18387; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18388; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18389; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18390; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18391; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18392; GFX10-CU-NEXT:    buffer_gl1_inv
18393; GFX10-CU-NEXT:    buffer_gl0_inv
18394; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18395; GFX10-CU-NEXT:    s_endpgm
18396;
18397; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18398; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18399; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18400; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18401; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18402; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18403; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18404; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18405; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18406; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18407; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18408; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18409; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18410; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18411; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18412; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18413; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18414; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18415; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18416; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18417; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18418; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18419; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18420; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18421; SKIP-CACHE-INV-NEXT:    s_endpgm
18422;
18423; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18424; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18425; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18426; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18427; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18428; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18429; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18430; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18431; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18432; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18433; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18434; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18435; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18436; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
18437; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18438; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18439;
18440; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18441; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18442; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18443; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18444; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18445; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18446; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18447; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18448; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18449; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18450; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18451; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18452; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18453; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
18454; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18455; GFX90A-TGSPLIT-NEXT:    s_endpgm
18456;
18457; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18458; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18459; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18460; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18461; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18462; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18463; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18464; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18465; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18466; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18467; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18468; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18469; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18470; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
18471; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18472; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18473;
18474; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18475; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18476; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18477; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18478; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18479; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18480; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18481; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18482; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18483; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18484; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18485; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18486; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18487; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
18488; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18489; GFX940-TGSPLIT-NEXT:    s_endpgm
18490;
18491; GFX11-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18492; GFX11-WGP:       ; %bb.0: ; %entry
18493; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18494; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18495; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18496; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18497; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18498; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18499; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18500; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18501; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18502; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18503; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18504; GFX11-WGP-NEXT:    buffer_gl1_inv
18505; GFX11-WGP-NEXT:    buffer_gl0_inv
18506; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18507; GFX11-WGP-NEXT:    s_endpgm
18508;
18509; GFX11-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18510; GFX11-CU:       ; %bb.0: ; %entry
18511; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18512; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18513; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18514; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18515; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18516; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18517; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18518; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18519; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18520; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18521; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18522; GFX11-CU-NEXT:    buffer_gl1_inv
18523; GFX11-CU-NEXT:    buffer_gl0_inv
18524; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18525; GFX11-CU-NEXT:    s_endpgm
18526;
18527; GFX12-WGP-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18528; GFX12-WGP:       ; %bb.0: ; %entry
18529; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18530; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18531; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18532; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18533; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18534; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18535; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18536; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18537; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18538; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18539; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18540; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
18541; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18542; GFX12-WGP-NEXT:    s_endpgm
18543;
18544; GFX12-CU-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
18545; GFX12-CU:       ; %bb.0: ; %entry
18546; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18547; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18548; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18549; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18550; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18551; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18552; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18553; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18554; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18555; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18556; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18557; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
18558; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18559; GFX12-CU-NEXT:    s_endpgm
18560    ptr addrspace(1) %out, i32 %in, i32 %old) {
18561entry:
18562  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18563  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic
18564  %val0 = extractvalue { i32, i1 } %val, 0
18565  store i32 %val0, ptr addrspace(1) %out, align 4
18566  ret void
18567}
18568
18569define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
18570; GFX6-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18571; GFX6:       ; %bb.0: ; %entry
18572; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18573; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18574; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18575; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18576; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18577; GFX6-NEXT:    s_mov_b32 s12, s5
18578; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18579; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18580; GFX6-NEXT:    s_mov_b32 s11, -1
18581; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18582; GFX6-NEXT:    s_mov_b32 s5, s12
18583; GFX6-NEXT:    s_mov_b32 s6, s11
18584; GFX6-NEXT:    s_mov_b32 s7, s10
18585; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18586; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18587; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18588; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18589; GFX6-NEXT:    s_waitcnt vmcnt(0)
18590; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18591; GFX6-NEXT:    s_waitcnt vmcnt(0)
18592; GFX6-NEXT:    buffer_wbinvl1
18593; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18594; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18595; GFX6-NEXT:    s_endpgm
18596;
18597; GFX7-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18598; GFX7:       ; %bb.0: ; %entry
18599; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18600; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18601; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18602; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18603; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18604; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18605; GFX7-NEXT:    s_mov_b32 s6, s4
18606; GFX7-NEXT:    s_mov_b32 s7, s5
18607; GFX7-NEXT:    s_mov_b32 s11, s12
18608; GFX7-NEXT:    s_mov_b32 s10, s13
18609; GFX7-NEXT:    s_add_u32 s6, s6, s11
18610; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18611; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18612; GFX7-NEXT:    s_mov_b32 s7, s10
18613; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18614; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18615; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18616; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18617; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18618; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18619; GFX7-NEXT:    s_waitcnt vmcnt(0)
18620; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18621; GFX7-NEXT:    s_waitcnt vmcnt(0)
18622; GFX7-NEXT:    buffer_wbinvl1_vol
18623; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18624; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18625; GFX7-NEXT:    flat_store_dword v[0:1], v2
18626; GFX7-NEXT:    s_endpgm
18627;
18628; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18629; GFX10-WGP:       ; %bb.0: ; %entry
18630; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18631; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18632; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18633; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18634; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18635; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18636; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18637; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18638; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18639; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18640; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18641; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18642; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18643; GFX10-WGP-NEXT:    buffer_gl1_inv
18644; GFX10-WGP-NEXT:    buffer_gl0_inv
18645; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18646; GFX10-WGP-NEXT:    s_endpgm
18647;
18648; GFX10-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18649; GFX10-CU:       ; %bb.0: ; %entry
18650; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18651; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18652; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18653; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18654; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18655; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18656; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18657; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18658; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18659; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18660; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18661; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18662; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18663; GFX10-CU-NEXT:    buffer_gl1_inv
18664; GFX10-CU-NEXT:    buffer_gl0_inv
18665; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18666; GFX10-CU-NEXT:    s_endpgm
18667;
18668; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18669; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18670; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18671; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18672; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18673; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18674; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18675; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18676; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18677; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18678; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18679; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18680; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18681; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18682; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18683; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18684; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18685; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18686; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18687; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18688; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18689; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18690; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18691; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18692; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18693; SKIP-CACHE-INV-NEXT:    s_endpgm
18694;
18695; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18696; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18697; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18698; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18699; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18700; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18701; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18702; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18703; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18704; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18705; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18706; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18707; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18708; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18709; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
18710; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18711; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18712;
18713; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18714; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18715; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18716; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18717; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18718; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18719; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18720; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18721; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18722; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18723; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18724; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18725; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18726; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18727; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
18728; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18729; GFX90A-TGSPLIT-NEXT:    s_endpgm
18730;
18731; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18732; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18733; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18734; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18735; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18736; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18737; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18738; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18739; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18740; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18741; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18742; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
18743; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18744; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18745; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18746; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
18747; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18748; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18749;
18750; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18751; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18752; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18753; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18754; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18755; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18756; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18757; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18758; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18759; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18760; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18761; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
18762; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18763; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
18764; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18765; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
18766; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18767; GFX940-TGSPLIT-NEXT:    s_endpgm
18768;
18769; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18770; GFX11-WGP:       ; %bb.0: ; %entry
18771; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18772; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18773; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18774; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18775; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18776; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18777; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18778; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18779; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18780; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18781; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18782; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18783; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18784; GFX11-WGP-NEXT:    buffer_gl1_inv
18785; GFX11-WGP-NEXT:    buffer_gl0_inv
18786; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18787; GFX11-WGP-NEXT:    s_endpgm
18788;
18789; GFX11-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18790; GFX11-CU:       ; %bb.0: ; %entry
18791; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18792; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18793; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18794; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18795; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18796; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18797; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18798; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18799; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18800; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18801; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18802; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18803; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18804; GFX11-CU-NEXT:    buffer_gl1_inv
18805; GFX11-CU-NEXT:    buffer_gl0_inv
18806; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18807; GFX11-CU-NEXT:    s_endpgm
18808;
18809; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18810; GFX12-WGP:       ; %bb.0: ; %entry
18811; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18812; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18813; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18814; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18815; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18816; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18817; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18818; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18819; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18820; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18821; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18822; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18823; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
18824; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18825; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18826; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18827; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18828; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
18829; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18830; GFX12-WGP-NEXT:    s_endpgm
18831;
18832; GFX12-CU-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
18833; GFX12-CU:       ; %bb.0: ; %entry
18834; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18835; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18836; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18837; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18838; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18839; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18840; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18841; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18842; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18843; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18844; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18845; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18846; GFX12-CU-NEXT:    s_wait_storecnt 0x0
18847; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
18848; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18849; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18850; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18851; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
18852; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18853; GFX12-CU-NEXT:    s_endpgm
18854    ptr addrspace(1) %out, i32 %in, i32 %old) {
18855entry:
18856  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18857  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic
18858  %val0 = extractvalue { i32, i1 } %val, 0
18859  store i32 %val0, ptr addrspace(1) %out, align 4
18860  ret void
18861}
18862
18863define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
18864; GFX6-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18865; GFX6:       ; %bb.0: ; %entry
18866; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18867; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18868; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18869; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18870; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18871; GFX6-NEXT:    s_mov_b32 s12, s5
18872; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18873; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18874; GFX6-NEXT:    s_mov_b32 s11, -1
18875; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18876; GFX6-NEXT:    s_mov_b32 s5, s12
18877; GFX6-NEXT:    s_mov_b32 s6, s11
18878; GFX6-NEXT:    s_mov_b32 s7, s10
18879; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18880; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18881; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18882; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18883; GFX6-NEXT:    s_waitcnt vmcnt(0)
18884; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18885; GFX6-NEXT:    s_waitcnt vmcnt(0)
18886; GFX6-NEXT:    buffer_wbinvl1
18887; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18888; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18889; GFX6-NEXT:    s_endpgm
18890;
18891; GFX7-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18892; GFX7:       ; %bb.0: ; %entry
18893; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18894; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18895; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18896; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18897; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18898; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18899; GFX7-NEXT:    s_mov_b32 s6, s4
18900; GFX7-NEXT:    s_mov_b32 s7, s5
18901; GFX7-NEXT:    s_mov_b32 s11, s12
18902; GFX7-NEXT:    s_mov_b32 s10, s13
18903; GFX7-NEXT:    s_add_u32 s6, s6, s11
18904; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18905; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18906; GFX7-NEXT:    s_mov_b32 s7, s10
18907; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18908; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18909; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18910; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18911; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18912; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18913; GFX7-NEXT:    s_waitcnt vmcnt(0)
18914; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18915; GFX7-NEXT:    s_waitcnt vmcnt(0)
18916; GFX7-NEXT:    buffer_wbinvl1_vol
18917; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18918; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18919; GFX7-NEXT:    flat_store_dword v[0:1], v2
18920; GFX7-NEXT:    s_endpgm
18921;
18922; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18923; GFX10-WGP:       ; %bb.0: ; %entry
18924; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18925; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18926; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18927; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18928; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18929; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18930; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18931; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18932; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18933; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18934; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18935; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18936; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18937; GFX10-WGP-NEXT:    buffer_gl1_inv
18938; GFX10-WGP-NEXT:    buffer_gl0_inv
18939; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18940; GFX10-WGP-NEXT:    s_endpgm
18941;
18942; GFX10-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18943; GFX10-CU:       ; %bb.0: ; %entry
18944; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18945; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18946; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18947; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18948; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18949; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18950; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18951; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18952; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18953; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18954; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18955; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18956; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18957; GFX10-CU-NEXT:    buffer_gl1_inv
18958; GFX10-CU-NEXT:    buffer_gl0_inv
18959; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18960; GFX10-CU-NEXT:    s_endpgm
18961;
18962; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18963; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18964; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18965; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18966; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18967; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18968; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18969; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18970; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18971; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18972; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18973; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18974; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18975; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18976; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18977; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18978; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18979; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18980; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18981; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18982; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18983; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18984; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18985; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18986; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18987; SKIP-CACHE-INV-NEXT:    s_endpgm
18988;
18989; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
18990; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18991; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18992; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18993; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18994; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18995; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18996; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18997; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18998; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18999; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19000; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19001; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19002; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19003; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19004; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19005; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19006;
19007; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19008; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19009; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19010; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19011; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19012; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19013; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19014; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19015; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19016; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19017; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19018; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19019; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19020; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19021; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19022; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19023; GFX90A-TGSPLIT-NEXT:    s_endpgm
19024;
19025; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19026; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19027; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19028; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19029; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19030; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19031; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19032; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19033; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19034; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19035; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19036; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
19037; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19038; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19039; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19040; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
19041; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19042; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19043;
19044; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19045; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19046; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19047; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19048; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19049; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19050; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19051; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19052; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19053; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19054; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19055; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
19056; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19057; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19058; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19059; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
19060; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19061; GFX940-TGSPLIT-NEXT:    s_endpgm
19062;
19063; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19064; GFX11-WGP:       ; %bb.0: ; %entry
19065; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19066; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19067; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19068; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19069; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19070; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19071; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19072; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19073; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19074; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19075; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19076; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19077; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19078; GFX11-WGP-NEXT:    buffer_gl1_inv
19079; GFX11-WGP-NEXT:    buffer_gl0_inv
19080; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19081; GFX11-WGP-NEXT:    s_endpgm
19082;
19083; GFX11-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19084; GFX11-CU:       ; %bb.0: ; %entry
19085; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19086; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19087; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19088; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19089; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19090; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19091; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19092; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19093; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19094; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19095; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19096; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19097; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19098; GFX11-CU-NEXT:    buffer_gl1_inv
19099; GFX11-CU-NEXT:    buffer_gl0_inv
19100; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19101; GFX11-CU-NEXT:    s_endpgm
19102;
19103; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19104; GFX12-WGP:       ; %bb.0: ; %entry
19105; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19106; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19107; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19108; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19109; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19110; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19111; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19112; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19113; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19114; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19115; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19116; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19117; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
19118; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19119; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19120; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19121; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19122; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
19123; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19124; GFX12-WGP-NEXT:    s_endpgm
19125;
19126; GFX12-CU-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
19127; GFX12-CU:       ; %bb.0: ; %entry
19128; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19129; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19130; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19131; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19132; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19133; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19134; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19135; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19136; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19137; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19138; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19139; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19140; GFX12-CU-NEXT:    s_wait_storecnt 0x0
19141; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19142; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19143; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19144; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19145; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
19146; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19147; GFX12-CU-NEXT:    s_endpgm
19148    ptr addrspace(1) %out, i32 %in, i32 %old) {
19149entry:
19150  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19151  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic
19152  %val0 = extractvalue { i32, i1 } %val, 0
19153  store i32 %val0, ptr addrspace(1) %out, align 4
19154  ret void
19155}
19156
19157define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
19158; GFX6-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19159; GFX6:       ; %bb.0: ; %entry
19160; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19161; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19162; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19163; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19164; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19165; GFX6-NEXT:    s_mov_b32 s12, s5
19166; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19167; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19168; GFX6-NEXT:    s_mov_b32 s11, -1
19169; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19170; GFX6-NEXT:    s_mov_b32 s5, s12
19171; GFX6-NEXT:    s_mov_b32 s6, s11
19172; GFX6-NEXT:    s_mov_b32 s7, s10
19173; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19174; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19175; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19176; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19177; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19178; GFX6-NEXT:    s_waitcnt vmcnt(0)
19179; GFX6-NEXT:    buffer_wbinvl1
19180; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19181; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19182; GFX6-NEXT:    s_endpgm
19183;
19184; GFX7-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19185; GFX7:       ; %bb.0: ; %entry
19186; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19187; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19188; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19189; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19190; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19191; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19192; GFX7-NEXT:    s_mov_b32 s6, s4
19193; GFX7-NEXT:    s_mov_b32 s7, s5
19194; GFX7-NEXT:    s_mov_b32 s11, s12
19195; GFX7-NEXT:    s_mov_b32 s10, s13
19196; GFX7-NEXT:    s_add_u32 s6, s6, s11
19197; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19198; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19199; GFX7-NEXT:    s_mov_b32 s7, s10
19200; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19201; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19202; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19203; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19204; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19205; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19206; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19207; GFX7-NEXT:    s_waitcnt vmcnt(0)
19208; GFX7-NEXT:    buffer_wbinvl1_vol
19209; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19210; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19211; GFX7-NEXT:    flat_store_dword v[0:1], v2
19212; GFX7-NEXT:    s_endpgm
19213;
19214; GFX10-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19215; GFX10-WGP:       ; %bb.0: ; %entry
19216; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19217; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19218; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19219; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19220; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19221; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19222; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19223; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19224; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19225; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19226; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19227; GFX10-WGP-NEXT:    buffer_gl1_inv
19228; GFX10-WGP-NEXT:    buffer_gl0_inv
19229; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19230; GFX10-WGP-NEXT:    s_endpgm
19231;
19232; GFX10-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19233; GFX10-CU:       ; %bb.0: ; %entry
19234; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19235; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19236; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19237; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19238; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19239; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19240; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19241; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19242; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19243; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19244; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19245; GFX10-CU-NEXT:    buffer_gl1_inv
19246; GFX10-CU-NEXT:    buffer_gl0_inv
19247; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19248; GFX10-CU-NEXT:    s_endpgm
19249;
19250; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19251; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19252; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19253; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19254; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19255; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19256; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19257; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19258; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19259; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19260; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19261; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19262; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19263; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19264; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19265; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19266; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19267; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19268; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19269; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19270; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19271; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19272; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19273; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19274; SKIP-CACHE-INV-NEXT:    s_endpgm
19275;
19276; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19277; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19278; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19279; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19280; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19281; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19282; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19283; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19284; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19285; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19286; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19287; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19288; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19289; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19290; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19291; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19292;
19293; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19294; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19295; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19296; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19297; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19298; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19299; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19300; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19301; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19302; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19303; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19304; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19305; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19306; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19307; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19308; GFX90A-TGSPLIT-NEXT:    s_endpgm
19309;
19310; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19311; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19312; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19313; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19314; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19315; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19316; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19317; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19318; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19319; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19320; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19321; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19322; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19323; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
19324; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19325; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19326;
19327; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19328; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19329; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19330; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19331; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19332; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19333; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19334; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19335; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19336; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19337; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19338; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19339; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19340; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
19341; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19342; GFX940-TGSPLIT-NEXT:    s_endpgm
19343;
19344; GFX11-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19345; GFX11-WGP:       ; %bb.0: ; %entry
19346; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19347; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19348; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19349; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19350; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19351; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19352; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19353; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19354; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19355; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19356; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19357; GFX11-WGP-NEXT:    buffer_gl1_inv
19358; GFX11-WGP-NEXT:    buffer_gl0_inv
19359; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19360; GFX11-WGP-NEXT:    s_endpgm
19361;
19362; GFX11-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19363; GFX11-CU:       ; %bb.0: ; %entry
19364; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19365; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19366; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19367; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19368; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19369; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19370; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19371; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19372; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19373; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19374; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19375; GFX11-CU-NEXT:    buffer_gl1_inv
19376; GFX11-CU-NEXT:    buffer_gl0_inv
19377; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19378; GFX11-CU-NEXT:    s_endpgm
19379;
19380; GFX12-WGP-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19381; GFX12-WGP:       ; %bb.0: ; %entry
19382; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19383; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19384; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19385; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19386; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19387; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19388; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19389; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19390; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19391; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19392; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19393; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19394; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19395; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
19396; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19397; GFX12-WGP-NEXT:    s_endpgm
19398;
19399; GFX12-CU-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
19400; GFX12-CU:       ; %bb.0: ; %entry
19401; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19402; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19403; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19404; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19405; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19406; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19407; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19408; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19409; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19410; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19411; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19412; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19413; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19414; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
19415; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19416; GFX12-CU-NEXT:    s_endpgm
19417    ptr addrspace(1) %out, i32 %in, i32 %old) {
19418entry:
19419  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19420  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire
19421  %val0 = extractvalue { i32, i1 } %val, 0
19422  store i32 %val0, ptr addrspace(1) %out, align 4
19423  ret void
19424}
19425
19426define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
19427; GFX6-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19428; GFX6:       ; %bb.0: ; %entry
19429; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19430; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19431; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19432; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19433; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19434; GFX6-NEXT:    s_mov_b32 s12, s5
19435; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19436; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19437; GFX6-NEXT:    s_mov_b32 s11, -1
19438; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19439; GFX6-NEXT:    s_mov_b32 s5, s12
19440; GFX6-NEXT:    s_mov_b32 s6, s11
19441; GFX6-NEXT:    s_mov_b32 s7, s10
19442; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19443; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19444; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19445; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19446; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19447; GFX6-NEXT:    s_waitcnt vmcnt(0)
19448; GFX6-NEXT:    buffer_wbinvl1
19449; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19450; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19451; GFX6-NEXT:    s_endpgm
19452;
19453; GFX7-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19454; GFX7:       ; %bb.0: ; %entry
19455; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19456; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19457; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19458; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19459; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19460; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19461; GFX7-NEXT:    s_mov_b32 s6, s4
19462; GFX7-NEXT:    s_mov_b32 s7, s5
19463; GFX7-NEXT:    s_mov_b32 s11, s12
19464; GFX7-NEXT:    s_mov_b32 s10, s13
19465; GFX7-NEXT:    s_add_u32 s6, s6, s11
19466; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19467; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19468; GFX7-NEXT:    s_mov_b32 s7, s10
19469; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19470; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19471; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19472; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19473; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19474; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19475; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19476; GFX7-NEXT:    s_waitcnt vmcnt(0)
19477; GFX7-NEXT:    buffer_wbinvl1_vol
19478; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19479; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19480; GFX7-NEXT:    flat_store_dword v[0:1], v2
19481; GFX7-NEXT:    s_endpgm
19482;
19483; GFX10-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19484; GFX10-WGP:       ; %bb.0: ; %entry
19485; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19486; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19487; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19488; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19489; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19490; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19491; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19492; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19493; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19494; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19495; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19496; GFX10-WGP-NEXT:    buffer_gl1_inv
19497; GFX10-WGP-NEXT:    buffer_gl0_inv
19498; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19499; GFX10-WGP-NEXT:    s_endpgm
19500;
19501; GFX10-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19502; GFX10-CU:       ; %bb.0: ; %entry
19503; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19504; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19505; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19506; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19507; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19508; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19509; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19510; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19511; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19512; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19513; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19514; GFX10-CU-NEXT:    buffer_gl1_inv
19515; GFX10-CU-NEXT:    buffer_gl0_inv
19516; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19517; GFX10-CU-NEXT:    s_endpgm
19518;
19519; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19520; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19521; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19522; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19523; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19524; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19525; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19526; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19527; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19528; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19529; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19530; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19531; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19532; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19533; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19534; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19535; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19536; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19537; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19538; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19539; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19540; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19541; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19542; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19543; SKIP-CACHE-INV-NEXT:    s_endpgm
19544;
19545; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19546; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19547; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19548; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19549; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19550; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19551; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19552; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19553; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19554; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19555; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19556; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19557; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19558; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19559; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19560; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19561;
19562; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19563; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19564; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19565; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19566; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19567; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19568; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19569; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19570; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19571; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19572; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19573; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19574; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19575; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19576; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19577; GFX90A-TGSPLIT-NEXT:    s_endpgm
19578;
19579; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19580; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19581; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19582; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19583; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19584; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19585; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19586; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19587; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19588; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19589; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19590; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19591; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19592; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
19593; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19594; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19595;
19596; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19597; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19598; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19599; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19600; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19601; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19602; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19603; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19604; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19605; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19606; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19607; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19608; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19609; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
19610; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19611; GFX940-TGSPLIT-NEXT:    s_endpgm
19612;
19613; GFX11-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19614; GFX11-WGP:       ; %bb.0: ; %entry
19615; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19616; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19617; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19618; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19619; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19620; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19621; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19622; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19623; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19624; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19625; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19626; GFX11-WGP-NEXT:    buffer_gl1_inv
19627; GFX11-WGP-NEXT:    buffer_gl0_inv
19628; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19629; GFX11-WGP-NEXT:    s_endpgm
19630;
19631; GFX11-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19632; GFX11-CU:       ; %bb.0: ; %entry
19633; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19634; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19635; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19636; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19637; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19638; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19639; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19640; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19641; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19642; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19643; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19644; GFX11-CU-NEXT:    buffer_gl1_inv
19645; GFX11-CU-NEXT:    buffer_gl0_inv
19646; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19647; GFX11-CU-NEXT:    s_endpgm
19648;
19649; GFX12-WGP-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19650; GFX12-WGP:       ; %bb.0: ; %entry
19651; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19652; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19653; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19654; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19655; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19656; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19657; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19658; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19659; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19660; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19661; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19662; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
19663; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19664; GFX12-WGP-NEXT:    s_endpgm
19665;
19666; GFX12-CU-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
19667; GFX12-CU:       ; %bb.0: ; %entry
19668; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19669; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19670; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19671; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19672; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19673; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19674; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19675; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19676; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19677; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19678; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19679; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
19680; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19681; GFX12-CU-NEXT:    s_endpgm
19682    ptr addrspace(1) %out, i32 %in, i32 %old) {
19683entry:
19684  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19685  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire
19686  %val0 = extractvalue { i32, i1 } %val, 0
19687  store i32 %val0, ptr addrspace(1) %out, align 4
19688  ret void
19689}
19690
19691define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
19692; GFX6-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19693; GFX6:       ; %bb.0: ; %entry
19694; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19695; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19696; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19697; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19698; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19699; GFX6-NEXT:    s_mov_b32 s12, s5
19700; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19701; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19702; GFX6-NEXT:    s_mov_b32 s11, -1
19703; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19704; GFX6-NEXT:    s_mov_b32 s5, s12
19705; GFX6-NEXT:    s_mov_b32 s6, s11
19706; GFX6-NEXT:    s_mov_b32 s7, s10
19707; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19708; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19709; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19710; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19711; GFX6-NEXT:    s_waitcnt vmcnt(0)
19712; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19713; GFX6-NEXT:    s_waitcnt vmcnt(0)
19714; GFX6-NEXT:    buffer_wbinvl1
19715; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19716; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19717; GFX6-NEXT:    s_endpgm
19718;
19719; GFX7-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19720; GFX7:       ; %bb.0: ; %entry
19721; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19722; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19723; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19724; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19725; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19726; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19727; GFX7-NEXT:    s_mov_b32 s6, s4
19728; GFX7-NEXT:    s_mov_b32 s7, s5
19729; GFX7-NEXT:    s_mov_b32 s11, s12
19730; GFX7-NEXT:    s_mov_b32 s10, s13
19731; GFX7-NEXT:    s_add_u32 s6, s6, s11
19732; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19733; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19734; GFX7-NEXT:    s_mov_b32 s7, s10
19735; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19736; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19737; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19738; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19739; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19740; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19741; GFX7-NEXT:    s_waitcnt vmcnt(0)
19742; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19743; GFX7-NEXT:    s_waitcnt vmcnt(0)
19744; GFX7-NEXT:    buffer_wbinvl1_vol
19745; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19746; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19747; GFX7-NEXT:    flat_store_dword v[0:1], v2
19748; GFX7-NEXT:    s_endpgm
19749;
19750; GFX10-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19751; GFX10-WGP:       ; %bb.0: ; %entry
19752; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19753; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19754; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19755; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19756; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19757; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19758; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19759; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19760; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19761; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19762; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19763; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19764; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19765; GFX10-WGP-NEXT:    buffer_gl1_inv
19766; GFX10-WGP-NEXT:    buffer_gl0_inv
19767; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19768; GFX10-WGP-NEXT:    s_endpgm
19769;
19770; GFX10-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19771; GFX10-CU:       ; %bb.0: ; %entry
19772; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19773; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19774; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19775; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19776; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19777; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19778; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19779; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19780; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19781; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19782; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19783; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19784; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19785; GFX10-CU-NEXT:    buffer_gl1_inv
19786; GFX10-CU-NEXT:    buffer_gl0_inv
19787; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19788; GFX10-CU-NEXT:    s_endpgm
19789;
19790; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19791; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19792; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19793; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19794; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19795; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19796; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19797; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19798; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19799; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19800; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19801; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19802; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19803; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19804; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19805; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19806; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19807; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19808; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19809; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19810; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19811; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19812; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19813; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19814; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19815; SKIP-CACHE-INV-NEXT:    s_endpgm
19816;
19817; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19818; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19819; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19820; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19821; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19822; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19823; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19824; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19825; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19826; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19827; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19828; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19829; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19830; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19831; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19832; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19833; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19834;
19835; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19836; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19837; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19838; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19839; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19840; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19841; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19842; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19843; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19844; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19845; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19846; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19847; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19848; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19849; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19850; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19851; GFX90A-TGSPLIT-NEXT:    s_endpgm
19852;
19853; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19854; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19855; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19856; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19857; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19858; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19859; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19860; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19861; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19862; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19863; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19864; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
19865; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19866; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19867; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19868; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
19869; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19870; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19871;
19872; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19873; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19874; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19875; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19876; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19877; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19878; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19879; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19880; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19881; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19882; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19883; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
19884; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19885; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
19886; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19887; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
19888; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19889; GFX940-TGSPLIT-NEXT:    s_endpgm
19890;
19891; GFX11-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19892; GFX11-WGP:       ; %bb.0: ; %entry
19893; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19894; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19895; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19896; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19897; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19898; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19899; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19900; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19901; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19902; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19903; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19904; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19905; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19906; GFX11-WGP-NEXT:    buffer_gl1_inv
19907; GFX11-WGP-NEXT:    buffer_gl0_inv
19908; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19909; GFX11-WGP-NEXT:    s_endpgm
19910;
19911; GFX11-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19912; GFX11-CU:       ; %bb.0: ; %entry
19913; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19914; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19915; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19916; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19917; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19918; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19919; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19920; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19921; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19922; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19923; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19924; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19925; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19926; GFX11-CU-NEXT:    buffer_gl1_inv
19927; GFX11-CU-NEXT:    buffer_gl0_inv
19928; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19929; GFX11-CU-NEXT:    s_endpgm
19930;
19931; GFX12-WGP-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19932; GFX12-WGP:       ; %bb.0: ; %entry
19933; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19934; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19935; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19936; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19937; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19938; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19939; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19940; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19941; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19942; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19943; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19944; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19945; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
19946; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19947; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19948; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19949; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19950; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
19951; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19952; GFX12-WGP-NEXT:    s_endpgm
19953;
19954; GFX12-CU-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
19955; GFX12-CU:       ; %bb.0: ; %entry
19956; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19957; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19958; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19959; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19960; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19961; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19962; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19963; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19964; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19965; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19966; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19967; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19968; GFX12-CU-NEXT:    s_wait_storecnt 0x0
19969; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
19970; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19971; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19972; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19973; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
19974; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19975; GFX12-CU-NEXT:    s_endpgm
19976    ptr addrspace(1) %out, i32 %in, i32 %old) {
19977entry:
19978  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19979  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire
19980  %val0 = extractvalue { i32, i1 } %val, 0
19981  store i32 %val0, ptr addrspace(1) %out, align 4
19982  ret void
19983}
19984
19985define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
19986; GFX6-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
19987; GFX6:       ; %bb.0: ; %entry
19988; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19989; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19990; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19991; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19992; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19993; GFX6-NEXT:    s_mov_b32 s12, s5
19994; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19995; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19996; GFX6-NEXT:    s_mov_b32 s11, -1
19997; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19998; GFX6-NEXT:    s_mov_b32 s5, s12
19999; GFX6-NEXT:    s_mov_b32 s6, s11
20000; GFX6-NEXT:    s_mov_b32 s7, s10
20001; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20002; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20003; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20004; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20005; GFX6-NEXT:    s_waitcnt vmcnt(0)
20006; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20007; GFX6-NEXT:    s_waitcnt vmcnt(0)
20008; GFX6-NEXT:    buffer_wbinvl1
20009; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20010; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20011; GFX6-NEXT:    s_endpgm
20012;
20013; GFX7-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20014; GFX7:       ; %bb.0: ; %entry
20015; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20016; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20017; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20018; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20019; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20020; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20021; GFX7-NEXT:    s_mov_b32 s6, s4
20022; GFX7-NEXT:    s_mov_b32 s7, s5
20023; GFX7-NEXT:    s_mov_b32 s11, s12
20024; GFX7-NEXT:    s_mov_b32 s10, s13
20025; GFX7-NEXT:    s_add_u32 s6, s6, s11
20026; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20027; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20028; GFX7-NEXT:    s_mov_b32 s7, s10
20029; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20030; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20031; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20032; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20033; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20034; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20035; GFX7-NEXT:    s_waitcnt vmcnt(0)
20036; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20037; GFX7-NEXT:    s_waitcnt vmcnt(0)
20038; GFX7-NEXT:    buffer_wbinvl1_vol
20039; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20040; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20041; GFX7-NEXT:    flat_store_dword v[0:1], v2
20042; GFX7-NEXT:    s_endpgm
20043;
20044; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20045; GFX10-WGP:       ; %bb.0: ; %entry
20046; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20047; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20048; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20049; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20050; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20051; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20052; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20053; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20054; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20055; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20056; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20057; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20058; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20059; GFX10-WGP-NEXT:    buffer_gl1_inv
20060; GFX10-WGP-NEXT:    buffer_gl0_inv
20061; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20062; GFX10-WGP-NEXT:    s_endpgm
20063;
20064; GFX10-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20065; GFX10-CU:       ; %bb.0: ; %entry
20066; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20067; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20068; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20069; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20070; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20071; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20072; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20073; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20074; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20075; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20076; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20077; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20078; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20079; GFX10-CU-NEXT:    buffer_gl1_inv
20080; GFX10-CU-NEXT:    buffer_gl0_inv
20081; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20082; GFX10-CU-NEXT:    s_endpgm
20083;
20084; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20085; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20086; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20087; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20088; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20089; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20090; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20091; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20092; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20093; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20094; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20095; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20096; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20097; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20098; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20099; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20100; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20101; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20102; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20103; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20104; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20105; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20106; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20107; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20108; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20109; SKIP-CACHE-INV-NEXT:    s_endpgm
20110;
20111; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20112; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20113; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20114; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20115; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20116; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20117; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20118; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20119; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20120; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20121; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20122; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20123; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20124; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20125; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20126; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20127; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20128;
20129; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20130; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20131; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20132; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20133; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20134; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20135; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20136; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20137; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20138; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20139; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20140; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20141; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20142; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20143; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20144; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20145; GFX90A-TGSPLIT-NEXT:    s_endpgm
20146;
20147; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20148; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20149; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20150; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20151; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20152; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20153; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20154; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20155; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20156; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20157; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20158; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
20159; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20160; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20161; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20162; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
20163; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20164; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20165;
20166; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20167; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20168; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20169; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20170; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20171; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20172; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20173; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20174; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20175; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20176; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20177; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
20178; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20179; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20180; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20181; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
20182; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20183; GFX940-TGSPLIT-NEXT:    s_endpgm
20184;
20185; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20186; GFX11-WGP:       ; %bb.0: ; %entry
20187; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20188; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20189; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20190; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20191; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20192; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20193; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20194; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20195; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20196; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20197; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20198; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20199; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20200; GFX11-WGP-NEXT:    buffer_gl1_inv
20201; GFX11-WGP-NEXT:    buffer_gl0_inv
20202; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20203; GFX11-WGP-NEXT:    s_endpgm
20204;
20205; GFX11-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20206; GFX11-CU:       ; %bb.0: ; %entry
20207; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20208; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20209; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20210; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20211; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20212; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20213; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20214; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20215; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20216; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20217; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20218; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20219; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20220; GFX11-CU-NEXT:    buffer_gl1_inv
20221; GFX11-CU-NEXT:    buffer_gl0_inv
20222; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20223; GFX11-CU-NEXT:    s_endpgm
20224;
20225; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20226; GFX12-WGP:       ; %bb.0: ; %entry
20227; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20228; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20229; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20230; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20231; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20232; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20233; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20234; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20235; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20236; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20237; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20238; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20239; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20240; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20241; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20242; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20243; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20244; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
20245; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20246; GFX12-WGP-NEXT:    s_endpgm
20247;
20248; GFX12-CU-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
20249; GFX12-CU:       ; %bb.0: ; %entry
20250; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20251; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20252; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20253; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20254; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20255; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20256; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20257; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20258; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20259; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20260; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20261; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20262; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20263; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20264; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20265; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20266; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20267; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
20268; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20269; GFX12-CU-NEXT:    s_endpgm
20270    ptr addrspace(1) %out, i32 %in, i32 %old) {
20271entry:
20272  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20273  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire
20274  %val0 = extractvalue { i32, i1 } %val, 0
20275  store i32 %val0, ptr addrspace(1) %out, align 4
20276  ret void
20277}
20278
20279define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
20280; GFX6-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20281; GFX6:       ; %bb.0: ; %entry
20282; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20283; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20284; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20285; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20286; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20287; GFX6-NEXT:    s_mov_b32 s12, s5
20288; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20289; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20290; GFX6-NEXT:    s_mov_b32 s11, -1
20291; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20292; GFX6-NEXT:    s_mov_b32 s5, s12
20293; GFX6-NEXT:    s_mov_b32 s6, s11
20294; GFX6-NEXT:    s_mov_b32 s7, s10
20295; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20296; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20297; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20298; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20299; GFX6-NEXT:    s_waitcnt vmcnt(0)
20300; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20301; GFX6-NEXT:    s_waitcnt vmcnt(0)
20302; GFX6-NEXT:    buffer_wbinvl1
20303; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20304; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20305; GFX6-NEXT:    s_endpgm
20306;
20307; GFX7-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20308; GFX7:       ; %bb.0: ; %entry
20309; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20310; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20311; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20312; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20313; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20314; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20315; GFX7-NEXT:    s_mov_b32 s6, s4
20316; GFX7-NEXT:    s_mov_b32 s7, s5
20317; GFX7-NEXT:    s_mov_b32 s11, s12
20318; GFX7-NEXT:    s_mov_b32 s10, s13
20319; GFX7-NEXT:    s_add_u32 s6, s6, s11
20320; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20321; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20322; GFX7-NEXT:    s_mov_b32 s7, s10
20323; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20324; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20325; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20326; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20327; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20328; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20329; GFX7-NEXT:    s_waitcnt vmcnt(0)
20330; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20331; GFX7-NEXT:    s_waitcnt vmcnt(0)
20332; GFX7-NEXT:    buffer_wbinvl1_vol
20333; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20334; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20335; GFX7-NEXT:    flat_store_dword v[0:1], v2
20336; GFX7-NEXT:    s_endpgm
20337;
20338; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20339; GFX10-WGP:       ; %bb.0: ; %entry
20340; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20341; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20342; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20343; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20344; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20345; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20346; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20347; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20348; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20349; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20350; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20351; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20352; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20353; GFX10-WGP-NEXT:    buffer_gl1_inv
20354; GFX10-WGP-NEXT:    buffer_gl0_inv
20355; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20356; GFX10-WGP-NEXT:    s_endpgm
20357;
20358; GFX10-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20359; GFX10-CU:       ; %bb.0: ; %entry
20360; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20361; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20362; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20363; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20364; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20365; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20366; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20367; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20368; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20369; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20370; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20371; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20372; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20373; GFX10-CU-NEXT:    buffer_gl1_inv
20374; GFX10-CU-NEXT:    buffer_gl0_inv
20375; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20376; GFX10-CU-NEXT:    s_endpgm
20377;
20378; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20379; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20380; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20381; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20382; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20383; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20384; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20385; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20386; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20387; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20388; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20389; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20390; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20391; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20392; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20393; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20394; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20395; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20396; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20397; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20398; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20399; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20400; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20401; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20402; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20403; SKIP-CACHE-INV-NEXT:    s_endpgm
20404;
20405; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20406; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20407; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20408; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20409; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20410; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20411; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20412; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20413; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20414; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20415; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20416; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20417; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20418; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20419; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20420; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20421; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20422;
20423; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20424; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20425; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20426; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20427; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20428; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20429; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20430; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20431; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20432; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20433; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20434; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20435; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20436; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20437; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20438; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20439; GFX90A-TGSPLIT-NEXT:    s_endpgm
20440;
20441; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20442; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20443; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20444; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20445; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20446; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20447; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20448; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20449; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20450; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20451; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20452; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
20453; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20454; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20455; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20456; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
20457; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20458; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20459;
20460; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20461; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20462; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20463; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20464; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20465; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20466; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20467; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20468; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20469; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20470; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20471; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
20472; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20473; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20474; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20475; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
20476; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20477; GFX940-TGSPLIT-NEXT:    s_endpgm
20478;
20479; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20480; GFX11-WGP:       ; %bb.0: ; %entry
20481; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20482; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20483; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20484; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20485; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20486; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20487; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20488; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20489; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20490; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20491; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20492; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20493; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20494; GFX11-WGP-NEXT:    buffer_gl1_inv
20495; GFX11-WGP-NEXT:    buffer_gl0_inv
20496; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20497; GFX11-WGP-NEXT:    s_endpgm
20498;
20499; GFX11-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20500; GFX11-CU:       ; %bb.0: ; %entry
20501; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20502; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20503; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20504; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20505; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20506; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20507; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20508; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20509; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20510; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20511; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20512; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20513; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20514; GFX11-CU-NEXT:    buffer_gl1_inv
20515; GFX11-CU-NEXT:    buffer_gl0_inv
20516; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20517; GFX11-CU-NEXT:    s_endpgm
20518;
20519; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20520; GFX12-WGP:       ; %bb.0: ; %entry
20521; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20522; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20523; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20524; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20525; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20526; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20527; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20528; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20529; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20530; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20531; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20532; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20533; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20534; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20535; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20536; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20537; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20538; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
20539; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20540; GFX12-WGP-NEXT:    s_endpgm
20541;
20542; GFX12-CU-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
20543; GFX12-CU:       ; %bb.0: ; %entry
20544; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20545; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20546; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20547; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20548; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20549; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20550; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20551; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20552; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20553; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20554; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20555; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20556; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20557; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20558; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20559; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20560; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20561; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
20562; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20563; GFX12-CU-NEXT:    s_endpgm
20564    ptr addrspace(1) %out, i32 %in, i32 %old) {
20565entry:
20566  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20567  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire
20568  %val0 = extractvalue { i32, i1 } %val, 0
20569  store i32 %val0, ptr addrspace(1) %out, align 4
20570  ret void
20571}
20572
20573define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
20574; GFX6-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20575; GFX6:       ; %bb.0: ; %entry
20576; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20577; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20578; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20579; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20580; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20581; GFX6-NEXT:    s_mov_b32 s12, s5
20582; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20583; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20584; GFX6-NEXT:    s_mov_b32 s11, -1
20585; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20586; GFX6-NEXT:    s_mov_b32 s5, s12
20587; GFX6-NEXT:    s_mov_b32 s6, s11
20588; GFX6-NEXT:    s_mov_b32 s7, s10
20589; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20590; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20591; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20592; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20593; GFX6-NEXT:    s_waitcnt vmcnt(0)
20594; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20595; GFX6-NEXT:    s_waitcnt vmcnt(0)
20596; GFX6-NEXT:    buffer_wbinvl1
20597; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20598; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20599; GFX6-NEXT:    s_endpgm
20600;
20601; GFX7-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20602; GFX7:       ; %bb.0: ; %entry
20603; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20604; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20605; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20606; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20607; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20608; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20609; GFX7-NEXT:    s_mov_b32 s6, s4
20610; GFX7-NEXT:    s_mov_b32 s7, s5
20611; GFX7-NEXT:    s_mov_b32 s11, s12
20612; GFX7-NEXT:    s_mov_b32 s10, s13
20613; GFX7-NEXT:    s_add_u32 s6, s6, s11
20614; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20615; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20616; GFX7-NEXT:    s_mov_b32 s7, s10
20617; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20618; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20619; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20620; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20621; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20622; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20623; GFX7-NEXT:    s_waitcnt vmcnt(0)
20624; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20625; GFX7-NEXT:    s_waitcnt vmcnt(0)
20626; GFX7-NEXT:    buffer_wbinvl1_vol
20627; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20628; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20629; GFX7-NEXT:    flat_store_dword v[0:1], v2
20630; GFX7-NEXT:    s_endpgm
20631;
20632; GFX10-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20633; GFX10-WGP:       ; %bb.0: ; %entry
20634; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20635; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20636; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20637; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20638; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20639; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20640; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20641; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20642; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20643; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20644; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20645; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20646; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20647; GFX10-WGP-NEXT:    buffer_gl1_inv
20648; GFX10-WGP-NEXT:    buffer_gl0_inv
20649; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20650; GFX10-WGP-NEXT:    s_endpgm
20651;
20652; GFX10-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20653; GFX10-CU:       ; %bb.0: ; %entry
20654; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20655; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20656; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20657; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20658; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20659; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20660; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20661; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20662; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20663; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20664; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20665; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20666; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20667; GFX10-CU-NEXT:    buffer_gl1_inv
20668; GFX10-CU-NEXT:    buffer_gl0_inv
20669; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20670; GFX10-CU-NEXT:    s_endpgm
20671;
20672; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20673; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20674; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20675; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20676; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20677; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20678; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20679; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20680; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20681; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20682; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20683; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20684; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20685; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20686; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20687; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20688; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20689; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20690; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20691; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20692; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20693; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20694; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20695; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20696; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20697; SKIP-CACHE-INV-NEXT:    s_endpgm
20698;
20699; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20700; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20701; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20702; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20703; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20704; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20705; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20706; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20707; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20708; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20709; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20710; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20711; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20712; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20713; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20714; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20715; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20716;
20717; GFX90A-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20718; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20719; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20720; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20721; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20722; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20723; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20724; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20725; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20726; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20727; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20728; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20729; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20730; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20731; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20732; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20733; GFX90A-TGSPLIT-NEXT:    s_endpgm
20734;
20735; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20736; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20737; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20738; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20739; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20740; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20741; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20742; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20743; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20744; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20745; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20746; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
20747; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20748; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20749; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20750; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
20751; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20752; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20753;
20754; GFX940-TGSPLIT-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20755; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20756; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20757; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20758; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20759; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20760; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20761; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20762; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20763; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20764; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20765; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
20766; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20767; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
20768; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20769; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
20770; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20771; GFX940-TGSPLIT-NEXT:    s_endpgm
20772;
20773; GFX11-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20774; GFX11-WGP:       ; %bb.0: ; %entry
20775; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20776; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20777; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20778; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20779; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20780; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20781; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20782; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20783; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20784; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20785; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20786; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20787; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20788; GFX11-WGP-NEXT:    buffer_gl1_inv
20789; GFX11-WGP-NEXT:    buffer_gl0_inv
20790; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20791; GFX11-WGP-NEXT:    s_endpgm
20792;
20793; GFX11-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20794; GFX11-CU:       ; %bb.0: ; %entry
20795; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20796; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20797; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20798; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20799; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20800; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20801; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20802; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20803; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20804; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20805; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20806; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20807; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20808; GFX11-CU-NEXT:    buffer_gl1_inv
20809; GFX11-CU-NEXT:    buffer_gl0_inv
20810; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20811; GFX11-CU-NEXT:    s_endpgm
20812;
20813; GFX12-WGP-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20814; GFX12-WGP:       ; %bb.0: ; %entry
20815; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20816; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20817; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20818; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20819; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20820; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20821; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20822; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20823; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20824; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20825; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20826; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20827; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20828; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20829; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20830; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20831; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20832; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
20833; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20834; GFX12-WGP-NEXT:    s_endpgm
20835;
20836; GFX12-CU-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
20837; GFX12-CU:       ; %bb.0: ; %entry
20838; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20839; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20840; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20841; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20842; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20843; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20844; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20845; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20846; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20847; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20848; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20849; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20850; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20851; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
20852; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20853; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20854; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20855; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
20856; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20857; GFX12-CU-NEXT:    s_endpgm
20858    ptr addrspace(1) %out, i32 %in, i32 %old) {
20859entry:
20860  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20861  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst
20862  %val0 = extractvalue { i32, i1 } %val, 0
20863  store i32 %val0, ptr addrspace(1) %out, align 4
20864  ret void
20865}
20866
20867define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
20868; GFX6-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20869; GFX6:       ; %bb.0: ; %entry
20870; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20871; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20872; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20873; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20874; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20875; GFX6-NEXT:    s_mov_b32 s12, s5
20876; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20877; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20878; GFX6-NEXT:    s_mov_b32 s11, -1
20879; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20880; GFX6-NEXT:    s_mov_b32 s5, s12
20881; GFX6-NEXT:    s_mov_b32 s6, s11
20882; GFX6-NEXT:    s_mov_b32 s7, s10
20883; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20884; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20885; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20886; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20887; GFX6-NEXT:    s_waitcnt vmcnt(0)
20888; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20889; GFX6-NEXT:    s_waitcnt vmcnt(0)
20890; GFX6-NEXT:    buffer_wbinvl1
20891; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20892; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20893; GFX6-NEXT:    s_endpgm
20894;
20895; GFX7-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20896; GFX7:       ; %bb.0: ; %entry
20897; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20898; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20899; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20900; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20901; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20902; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20903; GFX7-NEXT:    s_mov_b32 s6, s4
20904; GFX7-NEXT:    s_mov_b32 s7, s5
20905; GFX7-NEXT:    s_mov_b32 s11, s12
20906; GFX7-NEXT:    s_mov_b32 s10, s13
20907; GFX7-NEXT:    s_add_u32 s6, s6, s11
20908; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20909; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20910; GFX7-NEXT:    s_mov_b32 s7, s10
20911; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20912; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20913; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20914; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20915; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20916; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20917; GFX7-NEXT:    s_waitcnt vmcnt(0)
20918; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20919; GFX7-NEXT:    s_waitcnt vmcnt(0)
20920; GFX7-NEXT:    buffer_wbinvl1_vol
20921; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20922; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20923; GFX7-NEXT:    flat_store_dword v[0:1], v2
20924; GFX7-NEXT:    s_endpgm
20925;
20926; GFX10-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20927; GFX10-WGP:       ; %bb.0: ; %entry
20928; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20929; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20930; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20931; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20932; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20933; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20934; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20935; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20936; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20937; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20938; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20939; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20940; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20941; GFX10-WGP-NEXT:    buffer_gl1_inv
20942; GFX10-WGP-NEXT:    buffer_gl0_inv
20943; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20944; GFX10-WGP-NEXT:    s_endpgm
20945;
20946; GFX10-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20947; GFX10-CU:       ; %bb.0: ; %entry
20948; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20949; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20950; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20951; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20952; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20953; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20954; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20955; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20956; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20957; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20958; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20959; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20960; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20961; GFX10-CU-NEXT:    buffer_gl1_inv
20962; GFX10-CU-NEXT:    buffer_gl0_inv
20963; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20964; GFX10-CU-NEXT:    s_endpgm
20965;
20966; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20967; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20968; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20969; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20970; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20971; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20972; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20973; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20974; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20975; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20976; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20977; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20978; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20979; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20980; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20981; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20982; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20983; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20984; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20985; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20986; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20987; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20988; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20989; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20990; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20991; SKIP-CACHE-INV-NEXT:    s_endpgm
20992;
20993; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
20994; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20995; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20996; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20997; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20998; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20999; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21000; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21001; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21002; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21003; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21004; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21005; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21006; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21007; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
21008; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21009; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
21010;
21011; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21012; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
21013; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21014; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21015; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21016; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21017; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21018; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21019; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21020; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21021; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21022; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21023; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21024; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21025; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
21026; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21027; GFX90A-TGSPLIT-NEXT:    s_endpgm
21028;
21029; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21030; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
21031; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21032; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21033; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21034; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21035; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21036; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21037; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21038; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21039; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21040; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
21041; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21042; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21043; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21044; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
21045; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21046; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
21047;
21048; GFX940-TGSPLIT-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21049; GFX940-TGSPLIT:       ; %bb.0: ; %entry
21050; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21051; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21052; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21053; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21054; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21055; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21056; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21057; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21058; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21059; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
21060; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21061; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21062; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21063; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
21064; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21065; GFX940-TGSPLIT-NEXT:    s_endpgm
21066;
21067; GFX11-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21068; GFX11-WGP:       ; %bb.0: ; %entry
21069; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
21070; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21071; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21072; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21073; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21074; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
21075; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
21076; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21077; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
21078; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21079; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21080; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21081; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21082; GFX11-WGP-NEXT:    buffer_gl1_inv
21083; GFX11-WGP-NEXT:    buffer_gl0_inv
21084; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21085; GFX11-WGP-NEXT:    s_endpgm
21086;
21087; GFX11-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21088; GFX11-CU:       ; %bb.0: ; %entry
21089; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
21090; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21091; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21092; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21093; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
21094; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
21095; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
21096; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21097; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
21098; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21099; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21100; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21101; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21102; GFX11-CU-NEXT:    buffer_gl1_inv
21103; GFX11-CU-NEXT:    buffer_gl0_inv
21104; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21105; GFX11-CU-NEXT:    s_endpgm
21106;
21107; GFX12-WGP-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21108; GFX12-WGP:       ; %bb.0: ; %entry
21109; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
21110; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21111; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21112; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21113; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
21114; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
21115; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
21116; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21117; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
21118; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21119; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21120; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21121; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
21122; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21123; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21124; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
21125; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21126; GFX12-WGP-NEXT:    s_endpgm
21127;
21128; GFX12-CU-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
21129; GFX12-CU:       ; %bb.0: ; %entry
21130; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
21131; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21132; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21133; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21134; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
21135; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
21136; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
21137; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21138; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
21139; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21140; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21141; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21142; GFX12-CU-NEXT:    s_wait_storecnt 0x0
21143; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21144; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21145; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
21146; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21147; GFX12-CU-NEXT:    s_endpgm
21148    ptr addrspace(1) %out, i32 %in, i32 %old) {
21149entry:
21150  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
21151  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst
21152  %val0 = extractvalue { i32, i1 } %val, 0
21153  store i32 %val0, ptr addrspace(1) %out, align 4
21154  ret void
21155}
21156
21157define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
21158; GFX6-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21159; GFX6:       ; %bb.0: ; %entry
21160; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
21161; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21162; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
21163; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
21164; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
21165; GFX6-NEXT:    s_mov_b32 s12, s5
21166; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
21167; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
21168; GFX6-NEXT:    s_mov_b32 s11, -1
21169; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
21170; GFX6-NEXT:    s_mov_b32 s5, s12
21171; GFX6-NEXT:    s_mov_b32 s6, s11
21172; GFX6-NEXT:    s_mov_b32 s7, s10
21173; GFX6-NEXT:    v_mov_b32_e32 v0, s9
21174; GFX6-NEXT:    v_mov_b32_e32 v2, s8
21175; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21176; GFX6-NEXT:    v_mov_b32_e32 v1, v2
21177; GFX6-NEXT:    s_waitcnt vmcnt(0)
21178; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
21179; GFX6-NEXT:    s_waitcnt vmcnt(0)
21180; GFX6-NEXT:    buffer_wbinvl1
21181; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21182; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
21183; GFX6-NEXT:    s_endpgm
21184;
21185; GFX7-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21186; GFX7:       ; %bb.0: ; %entry
21187; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
21188; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21189; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
21190; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
21191; GFX7-NEXT:    s_mov_b64 s[12:13], 16
21192; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
21193; GFX7-NEXT:    s_mov_b32 s6, s4
21194; GFX7-NEXT:    s_mov_b32 s7, s5
21195; GFX7-NEXT:    s_mov_b32 s11, s12
21196; GFX7-NEXT:    s_mov_b32 s10, s13
21197; GFX7-NEXT:    s_add_u32 s6, s6, s11
21198; GFX7-NEXT:    s_addc_u32 s10, s7, s10
21199; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
21200; GFX7-NEXT:    s_mov_b32 s7, s10
21201; GFX7-NEXT:    v_mov_b32_e32 v2, s9
21202; GFX7-NEXT:    v_mov_b32_e32 v0, s8
21203; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21204; GFX7-NEXT:    v_mov_b32_e32 v3, v0
21205; GFX7-NEXT:    v_mov_b32_e32 v0, s6
21206; GFX7-NEXT:    v_mov_b32_e32 v1, s7
21207; GFX7-NEXT:    s_waitcnt vmcnt(0)
21208; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
21209; GFX7-NEXT:    s_waitcnt vmcnt(0)
21210; GFX7-NEXT:    buffer_wbinvl1_vol
21211; GFX7-NEXT:    v_mov_b32_e32 v0, s4
21212; GFX7-NEXT:    v_mov_b32_e32 v1, s5
21213; GFX7-NEXT:    flat_store_dword v[0:1], v2
21214; GFX7-NEXT:    s_endpgm
21215;
21216; GFX10-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21217; GFX10-WGP:       ; %bb.0: ; %entry
21218; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
21219; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21220; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
21221; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
21222; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21223; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
21224; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
21225; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21226; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
21227; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21228; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21229; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21230; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21231; GFX10-WGP-NEXT:    buffer_gl1_inv
21232; GFX10-WGP-NEXT:    buffer_gl0_inv
21233; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
21234; GFX10-WGP-NEXT:    s_endpgm
21235;
21236; GFX10-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21237; GFX10-CU:       ; %bb.0: ; %entry
21238; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
21239; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21240; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
21241; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
21242; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
21243; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
21244; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
21245; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21246; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
21247; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21248; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21249; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21250; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21251; GFX10-CU-NEXT:    buffer_gl1_inv
21252; GFX10-CU-NEXT:    buffer_gl0_inv
21253; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
21254; GFX10-CU-NEXT:    s_endpgm
21255;
21256; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21257; SKIP-CACHE-INV:       ; %bb.0: ; %entry
21258; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
21259; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
21260; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
21261; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
21262; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
21263; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
21264; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
21265; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
21266; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
21267; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
21268; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
21269; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
21270; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
21271; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
21272; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
21273; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21274; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
21275; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21276; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
21277; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21278; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21279; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21280; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
21281; SKIP-CACHE-INV-NEXT:    s_endpgm
21282;
21283; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21284; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
21285; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21286; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21287; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21288; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21289; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21290; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21291; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21292; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21293; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21294; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21295; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21296; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21297; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
21298; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21299; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
21300;
21301; GFX90A-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21302; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
21303; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21304; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21305; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21306; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21307; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21308; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21309; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21310; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21311; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21312; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21313; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21314; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21315; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
21316; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21317; GFX90A-TGSPLIT-NEXT:    s_endpgm
21318;
21319; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21320; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
21321; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21322; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21323; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21324; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21325; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21326; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21327; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21328; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21329; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21330; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
21331; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21332; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21333; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21334; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
21335; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21336; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
21337;
21338; GFX940-TGSPLIT-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21339; GFX940-TGSPLIT:       ; %bb.0: ; %entry
21340; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21341; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21342; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21343; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21344; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21345; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21346; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21347; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21348; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21349; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
21350; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21351; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21352; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21353; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
21354; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21355; GFX940-TGSPLIT-NEXT:    s_endpgm
21356;
21357; GFX11-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21358; GFX11-WGP:       ; %bb.0: ; %entry
21359; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
21360; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21361; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21362; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21363; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21364; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
21365; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
21366; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21367; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
21368; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21369; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21370; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21371; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21372; GFX11-WGP-NEXT:    buffer_gl1_inv
21373; GFX11-WGP-NEXT:    buffer_gl0_inv
21374; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21375; GFX11-WGP-NEXT:    s_endpgm
21376;
21377; GFX11-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21378; GFX11-CU:       ; %bb.0: ; %entry
21379; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
21380; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21381; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21382; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21383; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
21384; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
21385; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
21386; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21387; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
21388; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21389; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21390; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21391; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21392; GFX11-CU-NEXT:    buffer_gl1_inv
21393; GFX11-CU-NEXT:    buffer_gl0_inv
21394; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21395; GFX11-CU-NEXT:    s_endpgm
21396;
21397; GFX12-WGP-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21398; GFX12-WGP:       ; %bb.0: ; %entry
21399; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
21400; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21401; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21402; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21403; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
21404; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
21405; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
21406; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21407; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
21408; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21409; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21410; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21411; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
21412; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21413; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21414; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21415; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21416; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
21417; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21418; GFX12-WGP-NEXT:    s_endpgm
21419;
21420; GFX12-CU-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
21421; GFX12-CU:       ; %bb.0: ; %entry
21422; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
21423; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21424; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21425; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21426; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
21427; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
21428; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
21429; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21430; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
21431; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21432; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21433; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21434; GFX12-CU-NEXT:    s_wait_storecnt 0x0
21435; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21436; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21437; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21438; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21439; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
21440; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21441; GFX12-CU-NEXT:    s_endpgm
21442    ptr addrspace(1) %out, i32 %in, i32 %old) {
21443entry:
21444  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
21445  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst
21446  %val0 = extractvalue { i32, i1 } %val, 0
21447  store i32 %val0, ptr addrspace(1) %out, align 4
21448  ret void
21449}
21450
21451define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
21452; GFX6-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21453; GFX6:       ; %bb.0: ; %entry
21454; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
21455; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21456; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
21457; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
21458; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
21459; GFX6-NEXT:    s_mov_b32 s12, s5
21460; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
21461; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
21462; GFX6-NEXT:    s_mov_b32 s11, -1
21463; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
21464; GFX6-NEXT:    s_mov_b32 s5, s12
21465; GFX6-NEXT:    s_mov_b32 s6, s11
21466; GFX6-NEXT:    s_mov_b32 s7, s10
21467; GFX6-NEXT:    v_mov_b32_e32 v0, s9
21468; GFX6-NEXT:    v_mov_b32_e32 v2, s8
21469; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21470; GFX6-NEXT:    v_mov_b32_e32 v1, v2
21471; GFX6-NEXT:    s_waitcnt vmcnt(0)
21472; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
21473; GFX6-NEXT:    s_waitcnt vmcnt(0)
21474; GFX6-NEXT:    buffer_wbinvl1
21475; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21476; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
21477; GFX6-NEXT:    s_endpgm
21478;
21479; GFX7-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21480; GFX7:       ; %bb.0: ; %entry
21481; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
21482; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21483; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
21484; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
21485; GFX7-NEXT:    s_mov_b64 s[12:13], 16
21486; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
21487; GFX7-NEXT:    s_mov_b32 s6, s4
21488; GFX7-NEXT:    s_mov_b32 s7, s5
21489; GFX7-NEXT:    s_mov_b32 s11, s12
21490; GFX7-NEXT:    s_mov_b32 s10, s13
21491; GFX7-NEXT:    s_add_u32 s6, s6, s11
21492; GFX7-NEXT:    s_addc_u32 s10, s7, s10
21493; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
21494; GFX7-NEXT:    s_mov_b32 s7, s10
21495; GFX7-NEXT:    v_mov_b32_e32 v2, s9
21496; GFX7-NEXT:    v_mov_b32_e32 v0, s8
21497; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21498; GFX7-NEXT:    v_mov_b32_e32 v3, v0
21499; GFX7-NEXT:    v_mov_b32_e32 v0, s6
21500; GFX7-NEXT:    v_mov_b32_e32 v1, s7
21501; GFX7-NEXT:    s_waitcnt vmcnt(0)
21502; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
21503; GFX7-NEXT:    s_waitcnt vmcnt(0)
21504; GFX7-NEXT:    buffer_wbinvl1_vol
21505; GFX7-NEXT:    v_mov_b32_e32 v0, s4
21506; GFX7-NEXT:    v_mov_b32_e32 v1, s5
21507; GFX7-NEXT:    flat_store_dword v[0:1], v2
21508; GFX7-NEXT:    s_endpgm
21509;
21510; GFX10-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21511; GFX10-WGP:       ; %bb.0: ; %entry
21512; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
21513; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21514; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
21515; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
21516; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21517; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
21518; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
21519; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21520; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
21521; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21522; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21523; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21524; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21525; GFX10-WGP-NEXT:    buffer_gl1_inv
21526; GFX10-WGP-NEXT:    buffer_gl0_inv
21527; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
21528; GFX10-WGP-NEXT:    s_endpgm
21529;
21530; GFX10-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21531; GFX10-CU:       ; %bb.0: ; %entry
21532; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
21533; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21534; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
21535; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
21536; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
21537; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
21538; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
21539; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21540; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
21541; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21542; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21543; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21544; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21545; GFX10-CU-NEXT:    buffer_gl1_inv
21546; GFX10-CU-NEXT:    buffer_gl0_inv
21547; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
21548; GFX10-CU-NEXT:    s_endpgm
21549;
21550; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21551; SKIP-CACHE-INV:       ; %bb.0: ; %entry
21552; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
21553; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
21554; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
21555; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
21556; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
21557; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
21558; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
21559; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
21560; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
21561; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
21562; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
21563; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
21564; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
21565; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
21566; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
21567; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21568; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
21569; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21570; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
21571; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21572; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21573; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21574; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
21575; SKIP-CACHE-INV-NEXT:    s_endpgm
21576;
21577; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21578; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
21579; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21580; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21581; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21582; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21583; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21584; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21585; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21586; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21587; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21588; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21589; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21590; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21591; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
21592; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21593; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
21594;
21595; GFX90A-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21596; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
21597; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21598; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21599; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21600; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21601; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21602; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21603; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21604; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21605; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21606; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21607; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21608; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21609; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
21610; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21611; GFX90A-TGSPLIT-NEXT:    s_endpgm
21612;
21613; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21614; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
21615; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21616; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21617; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21618; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21619; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21620; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21621; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21622; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21623; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21624; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
21625; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21626; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21627; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21628; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
21629; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21630; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
21631;
21632; GFX940-TGSPLIT-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21633; GFX940-TGSPLIT:       ; %bb.0: ; %entry
21634; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21635; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21636; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21637; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21638; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21639; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21640; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21641; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21642; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21643; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
21644; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21645; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21646; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21647; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
21648; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21649; GFX940-TGSPLIT-NEXT:    s_endpgm
21650;
21651; GFX11-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21652; GFX11-WGP:       ; %bb.0: ; %entry
21653; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
21654; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21655; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21656; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21657; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21658; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
21659; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
21660; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21661; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
21662; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21663; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21664; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21665; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21666; GFX11-WGP-NEXT:    buffer_gl1_inv
21667; GFX11-WGP-NEXT:    buffer_gl0_inv
21668; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21669; GFX11-WGP-NEXT:    s_endpgm
21670;
21671; GFX11-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21672; GFX11-CU:       ; %bb.0: ; %entry
21673; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
21674; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21675; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21676; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21677; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
21678; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
21679; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
21680; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21681; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
21682; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21683; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21684; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21685; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21686; GFX11-CU-NEXT:    buffer_gl1_inv
21687; GFX11-CU-NEXT:    buffer_gl0_inv
21688; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21689; GFX11-CU-NEXT:    s_endpgm
21690;
21691; GFX12-WGP-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21692; GFX12-WGP:       ; %bb.0: ; %entry
21693; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
21694; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21695; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21696; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21697; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
21698; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
21699; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
21700; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21701; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
21702; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21703; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21704; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21705; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
21706; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21707; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21708; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21709; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21710; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
21711; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21712; GFX12-WGP-NEXT:    s_endpgm
21713;
21714; GFX12-CU-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
21715; GFX12-CU:       ; %bb.0: ; %entry
21716; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
21717; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21718; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21719; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21720; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
21721; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
21722; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
21723; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21724; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
21725; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21726; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21727; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21728; GFX12-CU-NEXT:    s_wait_storecnt 0x0
21729; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
21730; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21731; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21732; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21733; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
21734; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21735; GFX12-CU-NEXT:    s_endpgm
21736    ptr addrspace(1) %out, i32 %in, i32 %old) {
21737entry:
21738  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
21739  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst
21740  %val0 = extractvalue { i32, i1 } %val, 0
21741  store i32 %val0, ptr addrspace(1) %out, align 4
21742  ret void
21743}
21744
21745define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
21746; GFX6-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21747; GFX6:       ; %bb.0: ; %entry
21748; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
21749; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21750; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
21751; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
21752; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
21753; GFX6-NEXT:    s_mov_b32 s12, s5
21754; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
21755; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
21756; GFX6-NEXT:    s_mov_b32 s11, -1
21757; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
21758; GFX6-NEXT:    s_mov_b32 s5, s12
21759; GFX6-NEXT:    s_mov_b32 s6, s11
21760; GFX6-NEXT:    s_mov_b32 s7, s10
21761; GFX6-NEXT:    v_mov_b32_e32 v0, s9
21762; GFX6-NEXT:    v_mov_b32_e32 v2, s8
21763; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21764; GFX6-NEXT:    v_mov_b32_e32 v1, v2
21765; GFX6-NEXT:    s_waitcnt vmcnt(0)
21766; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
21767; GFX6-NEXT:    s_waitcnt vmcnt(0)
21768; GFX6-NEXT:    buffer_wbinvl1
21769; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21770; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
21771; GFX6-NEXT:    s_endpgm
21772;
21773; GFX7-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21774; GFX7:       ; %bb.0: ; %entry
21775; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
21776; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21777; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
21778; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
21779; GFX7-NEXT:    s_mov_b64 s[12:13], 16
21780; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
21781; GFX7-NEXT:    s_mov_b32 s6, s4
21782; GFX7-NEXT:    s_mov_b32 s7, s5
21783; GFX7-NEXT:    s_mov_b32 s11, s12
21784; GFX7-NEXT:    s_mov_b32 s10, s13
21785; GFX7-NEXT:    s_add_u32 s6, s6, s11
21786; GFX7-NEXT:    s_addc_u32 s10, s7, s10
21787; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
21788; GFX7-NEXT:    s_mov_b32 s7, s10
21789; GFX7-NEXT:    v_mov_b32_e32 v2, s9
21790; GFX7-NEXT:    v_mov_b32_e32 v0, s8
21791; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21792; GFX7-NEXT:    v_mov_b32_e32 v3, v0
21793; GFX7-NEXT:    v_mov_b32_e32 v0, s6
21794; GFX7-NEXT:    v_mov_b32_e32 v1, s7
21795; GFX7-NEXT:    s_waitcnt vmcnt(0)
21796; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
21797; GFX7-NEXT:    s_waitcnt vmcnt(0)
21798; GFX7-NEXT:    buffer_wbinvl1_vol
21799; GFX7-NEXT:    v_mov_b32_e32 v0, s4
21800; GFX7-NEXT:    v_mov_b32_e32 v1, s5
21801; GFX7-NEXT:    flat_store_dword v[0:1], v2
21802; GFX7-NEXT:    s_endpgm
21803;
21804; GFX10-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21805; GFX10-WGP:       ; %bb.0: ; %entry
21806; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
21807; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21808; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
21809; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
21810; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21811; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
21812; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
21813; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21814; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
21815; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21816; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21817; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21818; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21819; GFX10-WGP-NEXT:    buffer_gl1_inv
21820; GFX10-WGP-NEXT:    buffer_gl0_inv
21821; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
21822; GFX10-WGP-NEXT:    s_endpgm
21823;
21824; GFX10-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21825; GFX10-CU:       ; %bb.0: ; %entry
21826; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
21827; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21828; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
21829; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
21830; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
21831; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
21832; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
21833; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21834; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
21835; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21836; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21837; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21838; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21839; GFX10-CU-NEXT:    buffer_gl1_inv
21840; GFX10-CU-NEXT:    buffer_gl0_inv
21841; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
21842; GFX10-CU-NEXT:    s_endpgm
21843;
21844; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21845; SKIP-CACHE-INV:       ; %bb.0: ; %entry
21846; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
21847; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
21848; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
21849; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
21850; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
21851; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
21852; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
21853; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
21854; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
21855; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
21856; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
21857; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
21858; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
21859; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
21860; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
21861; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21862; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
21863; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21864; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
21865; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21866; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21867; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21868; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
21869; SKIP-CACHE-INV-NEXT:    s_endpgm
21870;
21871; GFX90A-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21872; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
21873; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21874; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21875; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21876; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21877; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21878; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21879; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21880; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21881; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21882; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21883; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21884; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21885; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
21886; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21887; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
21888;
21889; GFX90A-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21890; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
21891; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21892; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21893; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21894; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21895; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21896; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21897; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21898; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21899; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21900; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21901; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21902; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21903; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
21904; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21905; GFX90A-TGSPLIT-NEXT:    s_endpgm
21906;
21907; GFX940-NOTTGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21908; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
21909; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21910; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21911; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21912; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21913; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21914; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21915; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21916; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21917; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21918; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc1
21919; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21920; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21921; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21922; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc1
21923; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21924; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
21925;
21926; GFX940-TGSPLIT-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21927; GFX940-TGSPLIT:       ; %bb.0: ; %entry
21928; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21929; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21930; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21931; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21932; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21933; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21934; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21935; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21936; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21937; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc1
21938; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21939; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0
21940; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21941; GFX940-TGSPLIT-NEXT:    buffer_inv sc1
21942; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21943; GFX940-TGSPLIT-NEXT:    s_endpgm
21944;
21945; GFX11-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21946; GFX11-WGP:       ; %bb.0: ; %entry
21947; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
21948; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21949; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21950; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21951; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21952; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
21953; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
21954; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21955; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
21956; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21957; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21958; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21959; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21960; GFX11-WGP-NEXT:    buffer_gl1_inv
21961; GFX11-WGP-NEXT:    buffer_gl0_inv
21962; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21963; GFX11-WGP-NEXT:    s_endpgm
21964;
21965; GFX11-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21966; GFX11-CU:       ; %bb.0: ; %entry
21967; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
21968; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21969; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21970; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21971; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
21972; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
21973; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
21974; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21975; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
21976; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21977; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21978; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21979; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21980; GFX11-CU-NEXT:    buffer_gl1_inv
21981; GFX11-CU-NEXT:    buffer_gl0_inv
21982; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21983; GFX11-CU-NEXT:    s_endpgm
21984;
21985; GFX12-WGP-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
21986; GFX12-WGP:       ; %bb.0: ; %entry
21987; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
21988; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21989; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21990; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21991; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
21992; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
21993; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
21994; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21995; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
21996; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21997; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21998; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21999; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
22000; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22001; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
22002; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
22003; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
22004; GFX12-WGP-NEXT:    global_inv scope:SCOPE_DEV
22005; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
22006; GFX12-WGP-NEXT:    s_endpgm
22007;
22008; GFX12-CU-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
22009; GFX12-CU:       ; %bb.0: ; %entry
22010; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
22011; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
22012; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
22013; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
22014; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
22015; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
22016; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
22017; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
22018; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
22019; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
22020; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
22021; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
22022; GFX12-CU-NEXT:    s_wait_storecnt 0x0
22023; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
22024; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
22025; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
22026; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
22027; GFX12-CU-NEXT:    global_inv scope:SCOPE_DEV
22028; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
22029; GFX12-CU-NEXT:    s_endpgm
22030    ptr addrspace(1) %out, i32 %in, i32 %old) {
22031entry:
22032  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
22033  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst
22034  %val0 = extractvalue { i32, i1 } %val, 0
22035  store i32 %val0, ptr addrspace(1) %out, align 4
22036  ret void
22037}
22038