xref: /llvm-project/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx600 < %s | FileCheck --check-prefixes=GFX6 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx700 < %s | FileCheck --check-prefixes=GFX7 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10-WGP %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1010 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX10-CU %s
6; RUN: llc -mtriple=amdgcn-amd-amdpal -O0 -mcpu=gfx700 -amdgcn-skip-cache-invalidations < %s | FileCheck --check-prefixes=SKIP-CACHE-INV %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX90A-NOTTGSPLIT %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx90a -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX90A-TGSPLIT %s
9; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940-NOTTGSPLIT %s
10; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx940 -mattr=+tgsplit < %s | FileCheck -check-prefixes=GFX940-TGSPLIT %s
11; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GFX11-WGP %s
12; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1100 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX11-CU %s
13; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12-WGP %s
14; RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12-CU %s
15
16define amdgpu_kernel void @global_system_unordered_load(
17; GFX6-LABEL: global_system_unordered_load:
18; GFX6:       ; %bb.0: ; %entry
19; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
20; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
21; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
22; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
23; GFX6-NEXT:    s_mov_b32 s6, s9
24; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
25; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
26; GFX6-NEXT:    s_mov_b32 s13, -1
27; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
28; GFX6-NEXT:    s_mov_b32 s9, s6
29; GFX6-NEXT:    s_mov_b32 s10, s13
30; GFX6-NEXT:    s_mov_b32 s11, s12
31; GFX6-NEXT:    s_mov_b32 s14, s5
32; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
33; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
34; GFX6-NEXT:    s_mov_b32 s5, s14
35; GFX6-NEXT:    s_mov_b32 s6, s13
36; GFX6-NEXT:    s_mov_b32 s7, s12
37; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0
38; GFX6-NEXT:    s_waitcnt vmcnt(0)
39; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
40; GFX6-NEXT:    s_endpgm
41;
42; GFX7-LABEL: global_system_unordered_load:
43; GFX7:       ; %bb.0: ; %entry
44; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
45; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
46; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
47; GFX7-NEXT:    v_mov_b32_e32 v0, s6
48; GFX7-NEXT:    v_mov_b32_e32 v1, s7
49; GFX7-NEXT:    flat_load_dword v2, v[0:1]
50; GFX7-NEXT:    v_mov_b32_e32 v0, s4
51; GFX7-NEXT:    v_mov_b32_e32 v1, s5
52; GFX7-NEXT:    s_waitcnt vmcnt(0)
53; GFX7-NEXT:    flat_store_dword v[0:1], v2
54; GFX7-NEXT:    s_endpgm
55;
56; GFX10-WGP-LABEL: global_system_unordered_load:
57; GFX10-WGP:       ; %bb.0: ; %entry
58; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
59; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
60; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
61; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
62; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7]
63; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
64; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
65; GFX10-WGP-NEXT:    s_endpgm
66;
67; GFX10-CU-LABEL: global_system_unordered_load:
68; GFX10-CU:       ; %bb.0: ; %entry
69; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
70; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
71; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
72; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
73; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7]
74; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
75; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
76; GFX10-CU-NEXT:    s_endpgm
77;
78; SKIP-CACHE-INV-LABEL: global_system_unordered_load:
79; SKIP-CACHE-INV:       ; %bb.0: ; %entry
80; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
81; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
82; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
83; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
84; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
85; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
86; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
87; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
88; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
89; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
90; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
91; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
92; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
93; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
94; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
95; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
96; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
97; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
98; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0
99; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
100; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
101; SKIP-CACHE-INV-NEXT:    s_endpgm
102;
103; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_load:
104; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
105; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
106; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
107; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
108; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
109; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
110; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
111; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
112; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
113;
114; GFX90A-TGSPLIT-LABEL: global_system_unordered_load:
115; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
116; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
117; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
118; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
119; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
121; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
122; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
123; GFX90A-TGSPLIT-NEXT:    s_endpgm
124;
125; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_load:
126; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
127; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
128; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
129; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
130; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
131; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
132; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
133; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
134; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
135;
136; GFX940-TGSPLIT-LABEL: global_system_unordered_load:
137; GFX940-TGSPLIT:       ; %bb.0: ; %entry
138; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
139; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
140; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
141; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
143; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
144; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
145; GFX940-TGSPLIT-NEXT:    s_endpgm
146;
147; GFX11-WGP-LABEL: global_system_unordered_load:
148; GFX11-WGP:       ; %bb.0: ; %entry
149; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
150; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
151; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
152; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
153; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
154; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
155; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
156; GFX11-WGP-NEXT:    s_endpgm
157;
158; GFX11-CU-LABEL: global_system_unordered_load:
159; GFX11-CU:       ; %bb.0: ; %entry
160; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
161; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
162; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
163; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
164; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
165; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
166; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
167; GFX11-CU-NEXT:    s_endpgm
168;
169; GFX12-WGP-LABEL: global_system_unordered_load:
170; GFX12-WGP:       ; %bb.0: ; %entry
171; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
172; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
173; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
174; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
175; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
176; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
177; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
178; GFX12-WGP-NEXT:    s_endpgm
179;
180; GFX12-CU-LABEL: global_system_unordered_load:
181; GFX12-CU:       ; %bb.0: ; %entry
182; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
183; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
184; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
185; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
186; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
187; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
188; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
189; GFX12-CU-NEXT:    s_endpgm
190    ptr addrspace(1) %in, ptr addrspace(1) %out) {
191entry:
192  %val = load atomic i32, ptr addrspace(1) %in unordered, align 4
193  store i32 %val, ptr addrspace(1) %out
194  ret void
195}
196
197define amdgpu_kernel void @global_system_monotonic_load(
198; GFX6-LABEL: global_system_monotonic_load:
199; GFX6:       ; %bb.0: ; %entry
200; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
201; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
202; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
203; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
204; GFX6-NEXT:    s_mov_b32 s6, s9
205; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
206; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
207; GFX6-NEXT:    s_mov_b32 s13, -1
208; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
209; GFX6-NEXT:    s_mov_b32 s9, s6
210; GFX6-NEXT:    s_mov_b32 s10, s13
211; GFX6-NEXT:    s_mov_b32 s11, s12
212; GFX6-NEXT:    s_mov_b32 s14, s5
213; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
214; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
215; GFX6-NEXT:    s_mov_b32 s5, s14
216; GFX6-NEXT:    s_mov_b32 s6, s13
217; GFX6-NEXT:    s_mov_b32 s7, s12
218; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
219; GFX6-NEXT:    s_waitcnt vmcnt(0)
220; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
221; GFX6-NEXT:    s_endpgm
222;
223; GFX7-LABEL: global_system_monotonic_load:
224; GFX7:       ; %bb.0: ; %entry
225; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
226; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
227; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
228; GFX7-NEXT:    v_mov_b32_e32 v0, s6
229; GFX7-NEXT:    v_mov_b32_e32 v1, s7
230; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
231; GFX7-NEXT:    v_mov_b32_e32 v0, s4
232; GFX7-NEXT:    v_mov_b32_e32 v1, s5
233; GFX7-NEXT:    s_waitcnt vmcnt(0)
234; GFX7-NEXT:    flat_store_dword v[0:1], v2
235; GFX7-NEXT:    s_endpgm
236;
237; GFX10-WGP-LABEL: global_system_monotonic_load:
238; GFX10-WGP:       ; %bb.0: ; %entry
239; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
240; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
241; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
242; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
243; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
244; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
245; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
246; GFX10-WGP-NEXT:    s_endpgm
247;
248; GFX10-CU-LABEL: global_system_monotonic_load:
249; GFX10-CU:       ; %bb.0: ; %entry
250; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
251; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
252; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
253; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
254; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
255; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
256; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
257; GFX10-CU-NEXT:    s_endpgm
258;
259; SKIP-CACHE-INV-LABEL: global_system_monotonic_load:
260; SKIP-CACHE-INV:       ; %bb.0: ; %entry
261; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
262; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
263; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
264; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
265; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
266; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
267; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
268; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
269; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
270; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
271; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
272; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
273; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
274; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
275; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
276; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
277; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
278; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
279; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
280; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
281; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
282; SKIP-CACHE-INV-NEXT:    s_endpgm
283;
284; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_load:
285; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
286; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
287; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
288; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
289; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
290; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
291; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
292; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
293; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
294;
295; GFX90A-TGSPLIT-LABEL: global_system_monotonic_load:
296; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
297; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
298; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
299; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
300; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
301; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
302; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
303; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
304; GFX90A-TGSPLIT-NEXT:    s_endpgm
305;
306; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_load:
307; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
308; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
309; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
310; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
311; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
312; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
313; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
314; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
315; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
316;
317; GFX940-TGSPLIT-LABEL: global_system_monotonic_load:
318; GFX940-TGSPLIT:       ; %bb.0: ; %entry
319; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
320; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
321; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
322; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
323; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
324; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
325; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
326; GFX940-TGSPLIT-NEXT:    s_endpgm
327;
328; GFX11-WGP-LABEL: global_system_monotonic_load:
329; GFX11-WGP:       ; %bb.0: ; %entry
330; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
331; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
332; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
333; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
334; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
335; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
336; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
337; GFX11-WGP-NEXT:    s_endpgm
338;
339; GFX11-CU-LABEL: global_system_monotonic_load:
340; GFX11-CU:       ; %bb.0: ; %entry
341; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
342; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
343; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
344; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
345; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
346; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
347; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
348; GFX11-CU-NEXT:    s_endpgm
349;
350; GFX12-WGP-LABEL: global_system_monotonic_load:
351; GFX12-WGP:       ; %bb.0: ; %entry
352; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
353; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
354; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
355; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
356; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
357; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
358; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
359; GFX12-WGP-NEXT:    s_endpgm
360;
361; GFX12-CU-LABEL: global_system_monotonic_load:
362; GFX12-CU:       ; %bb.0: ; %entry
363; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
364; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
365; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
366; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
367; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
368; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
369; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
370; GFX12-CU-NEXT:    s_endpgm
371    ptr addrspace(1) %in, ptr addrspace(1) %out) {
372entry:
373  %val = load atomic i32, ptr addrspace(1) %in monotonic, align 4
374  store i32 %val, ptr addrspace(1) %out
375  ret void
376}
377
378define amdgpu_kernel void @global_system_acquire_load(
379; GFX6-LABEL: global_system_acquire_load:
380; GFX6:       ; %bb.0: ; %entry
381; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
382; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
383; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
384; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
385; GFX6-NEXT:    s_mov_b32 s6, s9
386; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
387; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
388; GFX6-NEXT:    s_mov_b32 s13, -1
389; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
390; GFX6-NEXT:    s_mov_b32 s9, s6
391; GFX6-NEXT:    s_mov_b32 s10, s13
392; GFX6-NEXT:    s_mov_b32 s11, s12
393; GFX6-NEXT:    s_mov_b32 s14, s5
394; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
395; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
396; GFX6-NEXT:    s_mov_b32 s5, s14
397; GFX6-NEXT:    s_mov_b32 s6, s13
398; GFX6-NEXT:    s_mov_b32 s7, s12
399; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
400; GFX6-NEXT:    s_waitcnt vmcnt(0)
401; GFX6-NEXT:    buffer_wbinvl1
402; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
403; GFX6-NEXT:    s_endpgm
404;
405; GFX7-LABEL: global_system_acquire_load:
406; GFX7:       ; %bb.0: ; %entry
407; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
408; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
409; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
410; GFX7-NEXT:    v_mov_b32_e32 v0, s6
411; GFX7-NEXT:    v_mov_b32_e32 v1, s7
412; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
413; GFX7-NEXT:    s_waitcnt vmcnt(0)
414; GFX7-NEXT:    buffer_wbinvl1_vol
415; GFX7-NEXT:    v_mov_b32_e32 v0, s4
416; GFX7-NEXT:    v_mov_b32_e32 v1, s5
417; GFX7-NEXT:    flat_store_dword v[0:1], v2
418; GFX7-NEXT:    s_endpgm
419;
420; GFX10-WGP-LABEL: global_system_acquire_load:
421; GFX10-WGP:       ; %bb.0: ; %entry
422; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
423; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
424; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
425; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
426; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
427; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
428; GFX10-WGP-NEXT:    buffer_gl1_inv
429; GFX10-WGP-NEXT:    buffer_gl0_inv
430; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
431; GFX10-WGP-NEXT:    s_endpgm
432;
433; GFX10-CU-LABEL: global_system_acquire_load:
434; GFX10-CU:       ; %bb.0: ; %entry
435; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
436; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
437; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
438; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
439; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
440; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
441; GFX10-CU-NEXT:    buffer_gl1_inv
442; GFX10-CU-NEXT:    buffer_gl0_inv
443; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
444; GFX10-CU-NEXT:    s_endpgm
445;
446; SKIP-CACHE-INV-LABEL: global_system_acquire_load:
447; SKIP-CACHE-INV:       ; %bb.0: ; %entry
448; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
449; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
450; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
451; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
452; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
453; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
454; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
455; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
456; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
457; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
458; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
459; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
460; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
461; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
462; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
463; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
464; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
465; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
466; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
467; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
468; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
469; SKIP-CACHE-INV-NEXT:    s_endpgm
470;
471; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_load:
472; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
473; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
474; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
475; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
476; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
477; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
478; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
479; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
480; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
481; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
482; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
483;
484; GFX90A-TGSPLIT-LABEL: global_system_acquire_load:
485; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
486; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
487; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
488; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
489; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
490; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
491; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
492; GFX90A-TGSPLIT-NEXT:    buffer_invl2
493; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
494; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
495; GFX90A-TGSPLIT-NEXT:    s_endpgm
496;
497; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_load:
498; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
499; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
500; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
501; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
502; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
503; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
504; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
505; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
506; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
507; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
508;
509; GFX940-TGSPLIT-LABEL: global_system_acquire_load:
510; GFX940-TGSPLIT:       ; %bb.0: ; %entry
511; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
512; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
513; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
514; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
515; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
516; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
517; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
518; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
519; GFX940-TGSPLIT-NEXT:    s_endpgm
520;
521; GFX11-WGP-LABEL: global_system_acquire_load:
522; GFX11-WGP:       ; %bb.0: ; %entry
523; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
524; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
525; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
526; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
527; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
528; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
529; GFX11-WGP-NEXT:    buffer_gl1_inv
530; GFX11-WGP-NEXT:    buffer_gl0_inv
531; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
532; GFX11-WGP-NEXT:    s_endpgm
533;
534; GFX11-CU-LABEL: global_system_acquire_load:
535; GFX11-CU:       ; %bb.0: ; %entry
536; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
537; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
538; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
539; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
540; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
541; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
542; GFX11-CU-NEXT:    buffer_gl1_inv
543; GFX11-CU-NEXT:    buffer_gl0_inv
544; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
545; GFX11-CU-NEXT:    s_endpgm
546;
547; GFX12-WGP-LABEL: global_system_acquire_load:
548; GFX12-WGP:       ; %bb.0: ; %entry
549; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
550; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
551; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
552; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
553; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
554; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
555; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
556; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
557; GFX12-WGP-NEXT:    s_endpgm
558;
559; GFX12-CU-LABEL: global_system_acquire_load:
560; GFX12-CU:       ; %bb.0: ; %entry
561; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
562; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
563; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
564; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
565; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
566; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
567; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
568; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
569; GFX12-CU-NEXT:    s_endpgm
570    ptr addrspace(1) %in, ptr addrspace(1) %out) {
571entry:
572  %val = load atomic i32, ptr addrspace(1) %in acquire, align 4
573  store i32 %val, ptr addrspace(1) %out
574  ret void
575}
576
577define amdgpu_kernel void @global_system_seq_cst_load(
578; GFX6-LABEL: global_system_seq_cst_load:
579; GFX6:       ; %bb.0: ; %entry
580; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
581; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
582; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
583; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
584; GFX6-NEXT:    s_mov_b32 s6, s9
585; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
586; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
587; GFX6-NEXT:    s_mov_b32 s13, -1
588; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
589; GFX6-NEXT:    s_mov_b32 s9, s6
590; GFX6-NEXT:    s_mov_b32 s10, s13
591; GFX6-NEXT:    s_mov_b32 s11, s12
592; GFX6-NEXT:    s_mov_b32 s14, s5
593; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
594; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
595; GFX6-NEXT:    s_mov_b32 s5, s14
596; GFX6-NEXT:    s_mov_b32 s6, s13
597; GFX6-NEXT:    s_mov_b32 s7, s12
598; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
599; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
600; GFX6-NEXT:    s_waitcnt vmcnt(0)
601; GFX6-NEXT:    buffer_wbinvl1
602; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
603; GFX6-NEXT:    s_endpgm
604;
605; GFX7-LABEL: global_system_seq_cst_load:
606; GFX7:       ; %bb.0: ; %entry
607; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
608; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
609; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
610; GFX7-NEXT:    v_mov_b32_e32 v0, s6
611; GFX7-NEXT:    v_mov_b32_e32 v1, s7
612; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
613; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
614; GFX7-NEXT:    s_waitcnt vmcnt(0)
615; GFX7-NEXT:    buffer_wbinvl1_vol
616; GFX7-NEXT:    v_mov_b32_e32 v0, s4
617; GFX7-NEXT:    v_mov_b32_e32 v1, s5
618; GFX7-NEXT:    flat_store_dword v[0:1], v2
619; GFX7-NEXT:    s_endpgm
620;
621; GFX10-WGP-LABEL: global_system_seq_cst_load:
622; GFX10-WGP:       ; %bb.0: ; %entry
623; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
624; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
625; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
626; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
627; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
628; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
629; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
630; GFX10-WGP-NEXT:    buffer_gl1_inv
631; GFX10-WGP-NEXT:    buffer_gl0_inv
632; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
633; GFX10-WGP-NEXT:    s_endpgm
634;
635; GFX10-CU-LABEL: global_system_seq_cst_load:
636; GFX10-CU:       ; %bb.0: ; %entry
637; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
638; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
639; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
640; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
641; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
642; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
643; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
644; GFX10-CU-NEXT:    buffer_gl1_inv
645; GFX10-CU-NEXT:    buffer_gl0_inv
646; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
647; GFX10-CU-NEXT:    s_endpgm
648;
649; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load:
650; SKIP-CACHE-INV:       ; %bb.0: ; %entry
651; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
652; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
653; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
654; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
655; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
656; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
657; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
658; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
659; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
660; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
661; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
662; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
663; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
664; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
665; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
666; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
667; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
668; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
669; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
670; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
671; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
672; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
673; SKIP-CACHE-INV-NEXT:    s_endpgm
674;
675; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_load:
676; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
677; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
678; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
679; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
680; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
681; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
682; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
683; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
684; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
685; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
686; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
687;
688; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_load:
689; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
690; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
691; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
692; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
693; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
694; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
695; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
696; GFX90A-TGSPLIT-NEXT:    buffer_invl2
697; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
698; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
699; GFX90A-TGSPLIT-NEXT:    s_endpgm
700;
701; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_load:
702; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
703; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
704; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
705; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
706; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
707; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
708; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
709; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
710; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
711; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
712;
713; GFX940-TGSPLIT-LABEL: global_system_seq_cst_load:
714; GFX940-TGSPLIT:       ; %bb.0: ; %entry
715; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
716; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
717; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
718; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
719; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
720; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
721; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
722; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
723; GFX940-TGSPLIT-NEXT:    s_endpgm
724;
725; GFX11-WGP-LABEL: global_system_seq_cst_load:
726; GFX11-WGP:       ; %bb.0: ; %entry
727; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
728; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
729; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
730; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
731; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
732; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
733; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
734; GFX11-WGP-NEXT:    buffer_gl1_inv
735; GFX11-WGP-NEXT:    buffer_gl0_inv
736; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
737; GFX11-WGP-NEXT:    s_endpgm
738;
739; GFX11-CU-LABEL: global_system_seq_cst_load:
740; GFX11-CU:       ; %bb.0: ; %entry
741; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
742; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
743; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
744; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
745; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
746; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
747; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
748; GFX11-CU-NEXT:    buffer_gl1_inv
749; GFX11-CU-NEXT:    buffer_gl0_inv
750; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
751; GFX11-CU-NEXT:    s_endpgm
752;
753; GFX12-WGP-LABEL: global_system_seq_cst_load:
754; GFX12-WGP:       ; %bb.0: ; %entry
755; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
756; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
757; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
758; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
759; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
760; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
761; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
762; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
763; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
764; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
765; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
766; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
767; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
768; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
769; GFX12-WGP-NEXT:    s_endpgm
770;
771; GFX12-CU-LABEL: global_system_seq_cst_load:
772; GFX12-CU:       ; %bb.0: ; %entry
773; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
774; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
775; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
776; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
777; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
778; GFX12-CU-NEXT:    s_wait_storecnt 0x0
779; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
780; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
781; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
782; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
783; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
784; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
785; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
786; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
787; GFX12-CU-NEXT:    s_endpgm
788    ptr addrspace(1) %in, ptr addrspace(1) %out) {
789entry:
790  %val = load atomic i32, ptr addrspace(1) %in seq_cst, align 4
791  store i32 %val, ptr addrspace(1) %out
792  ret void
793}
794
795define amdgpu_kernel void @global_system_unordered_store(
796; GFX6-LABEL: global_system_unordered_store:
797; GFX6:       ; %bb.0: ; %entry
798; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
799; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
800; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
801; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
802; GFX6-NEXT:    s_mov_b32 s11, s5
803; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
804; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
805; GFX6-NEXT:    s_mov_b32 s10, -1
806; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
807; GFX6-NEXT:    s_mov_b32 s5, s11
808; GFX6-NEXT:    s_mov_b32 s6, s10
809; GFX6-NEXT:    s_mov_b32 s7, s9
810; GFX6-NEXT:    v_mov_b32_e32 v0, s8
811; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
812; GFX6-NEXT:    s_endpgm
813;
814; GFX7-LABEL: global_system_unordered_store:
815; GFX7:       ; %bb.0: ; %entry
816; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
817; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
818; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
819; GFX7-NEXT:    v_mov_b32_e32 v0, s6
820; GFX7-NEXT:    v_mov_b32_e32 v1, s7
821; GFX7-NEXT:    v_mov_b32_e32 v2, s4
822; GFX7-NEXT:    flat_store_dword v[0:1], v2
823; GFX7-NEXT:    s_endpgm
824;
825; GFX10-WGP-LABEL: global_system_unordered_store:
826; GFX10-WGP:       ; %bb.0: ; %entry
827; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
828; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
829; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
830; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
831; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
832; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
833; GFX10-WGP-NEXT:    s_endpgm
834;
835; GFX10-CU-LABEL: global_system_unordered_store:
836; GFX10-CU:       ; %bb.0: ; %entry
837; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
838; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
839; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
840; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
841; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
842; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
843; GFX10-CU-NEXT:    s_endpgm
844;
845; SKIP-CACHE-INV-LABEL: global_system_unordered_store:
846; SKIP-CACHE-INV:       ; %bb.0: ; %entry
847; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
848; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
849; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
850; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
851; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
852; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
853; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
854; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
855; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
856; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
857; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
858; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
859; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
860; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
861; SKIP-CACHE-INV-NEXT:    s_endpgm
862;
863; GFX90A-NOTTGSPLIT-LABEL: global_system_unordered_store:
864; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
865; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
866; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
867; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
868; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
869; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
870; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
871; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
872;
873; GFX90A-TGSPLIT-LABEL: global_system_unordered_store:
874; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
875; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
876; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
877; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
878; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
879; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
880; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
881; GFX90A-TGSPLIT-NEXT:    s_endpgm
882;
883; GFX940-NOTTGSPLIT-LABEL: global_system_unordered_store:
884; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
885; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
886; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
887; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
888; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
889; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
890; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
891; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
892;
893; GFX940-TGSPLIT-LABEL: global_system_unordered_store:
894; GFX940-TGSPLIT:       ; %bb.0: ; %entry
895; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
896; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
897; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
898; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
899; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
900; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
901; GFX940-TGSPLIT-NEXT:    s_endpgm
902;
903; GFX11-WGP-LABEL: global_system_unordered_store:
904; GFX11-WGP:       ; %bb.0: ; %entry
905; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
906; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
907; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
908; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
909; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
910; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
911; GFX11-WGP-NEXT:    s_endpgm
912;
913; GFX11-CU-LABEL: global_system_unordered_store:
914; GFX11-CU:       ; %bb.0: ; %entry
915; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
916; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
917; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
918; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
919; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
920; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
921; GFX11-CU-NEXT:    s_endpgm
922;
923; GFX12-WGP-LABEL: global_system_unordered_store:
924; GFX12-WGP:       ; %bb.0: ; %entry
925; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
926; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
927; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
928; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
929; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
930; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
931; GFX12-WGP-NEXT:    s_endpgm
932;
933; GFX12-CU-LABEL: global_system_unordered_store:
934; GFX12-CU:       ; %bb.0: ; %entry
935; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
936; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
937; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
938; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
939; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
940; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
941; GFX12-CU-NEXT:    s_endpgm
942    i32 %in, ptr addrspace(1) %out) {
943entry:
944  store atomic i32 %in, ptr addrspace(1) %out unordered, align 4
945  ret void
946}
947
948define amdgpu_kernel void @global_system_monotonic_store(
949; GFX6-LABEL: global_system_monotonic_store:
950; GFX6:       ; %bb.0: ; %entry
951; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
952; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
953; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
954; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
955; GFX6-NEXT:    s_mov_b32 s11, s5
956; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
957; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
958; GFX6-NEXT:    s_mov_b32 s10, -1
959; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
960; GFX6-NEXT:    s_mov_b32 s5, s11
961; GFX6-NEXT:    s_mov_b32 s6, s10
962; GFX6-NEXT:    s_mov_b32 s7, s9
963; GFX6-NEXT:    v_mov_b32_e32 v0, s8
964; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
965; GFX6-NEXT:    s_endpgm
966;
967; GFX7-LABEL: global_system_monotonic_store:
968; GFX7:       ; %bb.0: ; %entry
969; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
970; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
971; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
972; GFX7-NEXT:    v_mov_b32_e32 v0, s6
973; GFX7-NEXT:    v_mov_b32_e32 v1, s7
974; GFX7-NEXT:    v_mov_b32_e32 v2, s4
975; GFX7-NEXT:    flat_store_dword v[0:1], v2
976; GFX7-NEXT:    s_endpgm
977;
978; GFX10-WGP-LABEL: global_system_monotonic_store:
979; GFX10-WGP:       ; %bb.0: ; %entry
980; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
981; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
982; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
983; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
984; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
985; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
986; GFX10-WGP-NEXT:    s_endpgm
987;
988; GFX10-CU-LABEL: global_system_monotonic_store:
989; GFX10-CU:       ; %bb.0: ; %entry
990; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
991; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
992; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
993; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
994; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
995; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
996; GFX10-CU-NEXT:    s_endpgm
997;
998; SKIP-CACHE-INV-LABEL: global_system_monotonic_store:
999; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1000; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
1001; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
1002; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
1003; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1004; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1005; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1006; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1007; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1008; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1009; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1010; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1011; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1012; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1013; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1014; SKIP-CACHE-INV-NEXT:    s_endpgm
1015;
1016; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_store:
1017; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1018; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1019; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1020; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1021; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1022; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1023; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1024; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1025;
1026; GFX90A-TGSPLIT-LABEL: global_system_monotonic_store:
1027; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1028; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1029; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1030; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1031; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1032; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1033; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1034; GFX90A-TGSPLIT-NEXT:    s_endpgm
1035;
1036; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_store:
1037; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1038; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1039; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1040; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1041; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1042; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1043; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1044; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1045;
1046; GFX940-TGSPLIT-LABEL: global_system_monotonic_store:
1047; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1048; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1049; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1050; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1051; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1052; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1053; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1054; GFX940-TGSPLIT-NEXT:    s_endpgm
1055;
1056; GFX11-WGP-LABEL: global_system_monotonic_store:
1057; GFX11-WGP:       ; %bb.0: ; %entry
1058; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1059; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1060; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1061; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1062; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1063; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1064; GFX11-WGP-NEXT:    s_endpgm
1065;
1066; GFX11-CU-LABEL: global_system_monotonic_store:
1067; GFX11-CU:       ; %bb.0: ; %entry
1068; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1069; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1070; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1071; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1072; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1073; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1074; GFX11-CU-NEXT:    s_endpgm
1075;
1076; GFX12-WGP-LABEL: global_system_monotonic_store:
1077; GFX12-WGP:       ; %bb.0: ; %entry
1078; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1079; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1080; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1081; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1082; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1083; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1084; GFX12-WGP-NEXT:    s_endpgm
1085;
1086; GFX12-CU-LABEL: global_system_monotonic_store:
1087; GFX12-CU:       ; %bb.0: ; %entry
1088; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1089; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1090; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1091; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1092; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1093; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1094; GFX12-CU-NEXT:    s_endpgm
1095    i32 %in, ptr addrspace(1) %out) {
1096entry:
1097  store atomic i32 %in, ptr addrspace(1) %out monotonic, align 4
1098  ret void
1099}
1100
1101define amdgpu_kernel void @global_system_release_store(
1102; GFX6-LABEL: global_system_release_store:
1103; GFX6:       ; %bb.0: ; %entry
1104; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
1105; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
1106; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
1107; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1108; GFX6-NEXT:    s_mov_b32 s11, s5
1109; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1110; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1111; GFX6-NEXT:    s_mov_b32 s10, -1
1112; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1113; GFX6-NEXT:    s_mov_b32 s5, s11
1114; GFX6-NEXT:    s_mov_b32 s6, s10
1115; GFX6-NEXT:    s_mov_b32 s7, s9
1116; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1117; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1118; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1119; GFX6-NEXT:    s_endpgm
1120;
1121; GFX7-LABEL: global_system_release_store:
1122; GFX7:       ; %bb.0: ; %entry
1123; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
1124; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
1125; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1126; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1127; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1128; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1129; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1130; GFX7-NEXT:    flat_store_dword v[0:1], v2
1131; GFX7-NEXT:    s_endpgm
1132;
1133; GFX10-WGP-LABEL: global_system_release_store:
1134; GFX10-WGP:       ; %bb.0: ; %entry
1135; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
1136; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1137; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1138; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1139; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1140; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1141; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1142; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
1143; GFX10-WGP-NEXT:    s_endpgm
1144;
1145; GFX10-CU-LABEL: global_system_release_store:
1146; GFX10-CU:       ; %bb.0: ; %entry
1147; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
1148; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1149; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1150; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1151; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1152; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1153; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1154; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
1155; GFX10-CU-NEXT:    s_endpgm
1156;
1157; SKIP-CACHE-INV-LABEL: global_system_release_store:
1158; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1159; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
1160; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
1161; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
1162; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1163; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1164; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1165; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1166; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1167; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1168; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1169; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1170; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1171; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1172; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1173; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1174; SKIP-CACHE-INV-NEXT:    s_endpgm
1175;
1176; GFX90A-NOTTGSPLIT-LABEL: global_system_release_store:
1177; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1178; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1179; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1180; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1181; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1182; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1183; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
1184; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1185; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1186; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1187;
1188; GFX90A-TGSPLIT-LABEL: global_system_release_store:
1189; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1190; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1191; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1192; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1193; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1194; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1195; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
1196; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1197; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1198; GFX90A-TGSPLIT-NEXT:    s_endpgm
1199;
1200; GFX940-NOTTGSPLIT-LABEL: global_system_release_store:
1201; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1202; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1203; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1204; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1205; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1206; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1207; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1208; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1209; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1210; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1211;
1212; GFX940-TGSPLIT-LABEL: global_system_release_store:
1213; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1214; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1215; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1216; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1217; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1218; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1219; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1220; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1221; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1222; GFX940-TGSPLIT-NEXT:    s_endpgm
1223;
1224; GFX11-WGP-LABEL: global_system_release_store:
1225; GFX11-WGP:       ; %bb.0: ; %entry
1226; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1227; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1228; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1229; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1230; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1231; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1232; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1233; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1234; GFX11-WGP-NEXT:    s_endpgm
1235;
1236; GFX11-CU-LABEL: global_system_release_store:
1237; GFX11-CU:       ; %bb.0: ; %entry
1238; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1239; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1240; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1241; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1242; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1243; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1244; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1245; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1246; GFX11-CU-NEXT:    s_endpgm
1247;
1248; GFX12-WGP-LABEL: global_system_release_store:
1249; GFX12-WGP:       ; %bb.0: ; %entry
1250; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1251; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1252; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1253; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1254; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1255; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
1256; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1257; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1258; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1259; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1260; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1261; GFX12-WGP-NEXT:    s_endpgm
1262;
1263; GFX12-CU-LABEL: global_system_release_store:
1264; GFX12-CU:       ; %bb.0: ; %entry
1265; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1266; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1267; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1268; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1269; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1270; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
1271; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1272; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1273; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1274; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1275; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1276; GFX12-CU-NEXT:    s_endpgm
1277    i32 %in, ptr addrspace(1) %out) {
1278entry:
1279  store atomic i32 %in, ptr addrspace(1) %out release, align 4
1280  ret void
1281}
1282
1283define amdgpu_kernel void @global_system_seq_cst_store(
1284; GFX6-LABEL: global_system_seq_cst_store:
1285; GFX6:       ; %bb.0: ; %entry
1286; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
1287; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
1288; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
1289; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1290; GFX6-NEXT:    s_mov_b32 s11, s5
1291; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1292; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1293; GFX6-NEXT:    s_mov_b32 s10, -1
1294; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1295; GFX6-NEXT:    s_mov_b32 s5, s11
1296; GFX6-NEXT:    s_mov_b32 s6, s10
1297; GFX6-NEXT:    s_mov_b32 s7, s9
1298; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1299; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1300; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1301; GFX6-NEXT:    s_endpgm
1302;
1303; GFX7-LABEL: global_system_seq_cst_store:
1304; GFX7:       ; %bb.0: ; %entry
1305; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
1306; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
1307; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1308; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1309; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1310; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1311; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1312; GFX7-NEXT:    flat_store_dword v[0:1], v2
1313; GFX7-NEXT:    s_endpgm
1314;
1315; GFX10-WGP-LABEL: global_system_seq_cst_store:
1316; GFX10-WGP:       ; %bb.0: ; %entry
1317; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
1318; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1319; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1320; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1321; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1322; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1323; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1324; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
1325; GFX10-WGP-NEXT:    s_endpgm
1326;
1327; GFX10-CU-LABEL: global_system_seq_cst_store:
1328; GFX10-CU:       ; %bb.0: ; %entry
1329; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
1330; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1331; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1332; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1333; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1334; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1335; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1336; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
1337; GFX10-CU-NEXT:    s_endpgm
1338;
1339; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store:
1340; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1341; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
1342; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
1343; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
1344; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1345; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1346; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1347; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1348; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1349; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1350; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1351; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1352; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1353; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1354; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1355; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1356; SKIP-CACHE-INV-NEXT:    s_endpgm
1357;
1358; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_store:
1359; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1360; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1361; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1362; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1363; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1364; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1365; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
1366; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1367; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1368; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1369;
1370; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_store:
1371; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1372; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
1373; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
1374; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1375; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1376; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1377; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
1378; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1379; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
1380; GFX90A-TGSPLIT-NEXT:    s_endpgm
1381;
1382; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_store:
1383; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1384; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1385; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1386; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1387; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1388; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1389; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1390; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1391; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1392; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1393;
1394; GFX940-TGSPLIT-LABEL: global_system_seq_cst_store:
1395; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1396; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
1397; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
1398; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1399; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1400; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1401; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1402; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1403; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
1404; GFX940-TGSPLIT-NEXT:    s_endpgm
1405;
1406; GFX11-WGP-LABEL: global_system_seq_cst_store:
1407; GFX11-WGP:       ; %bb.0: ; %entry
1408; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1409; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1410; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1411; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1412; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1413; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1414; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1415; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
1416; GFX11-WGP-NEXT:    s_endpgm
1417;
1418; GFX11-CU-LABEL: global_system_seq_cst_store:
1419; GFX11-CU:       ; %bb.0: ; %entry
1420; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1421; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1422; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1423; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1424; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1425; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1426; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1427; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
1428; GFX11-CU-NEXT:    s_endpgm
1429;
1430; GFX12-WGP-LABEL: global_system_seq_cst_store:
1431; GFX12-WGP:       ; %bb.0: ; %entry
1432; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
1433; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1434; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1435; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1436; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1437; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
1438; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1439; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1440; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1441; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1442; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1443; GFX12-WGP-NEXT:    s_endpgm
1444;
1445; GFX12-CU-LABEL: global_system_seq_cst_store:
1446; GFX12-CU:       ; %bb.0: ; %entry
1447; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
1448; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
1449; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1450; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1451; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1452; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
1453; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1454; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1455; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1456; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1457; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1458; GFX12-CU-NEXT:    s_endpgm
1459    i32 %in, ptr addrspace(1) %out) {
1460entry:
1461  store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4
1462  ret void
1463}
1464
1465define amdgpu_kernel void @global_system_monotonic_atomicrmw(
1466; GFX6-LABEL: global_system_monotonic_atomicrmw:
1467; GFX6:       ; %bb.0: ; %entry
1468; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1469; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1470; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1471; GFX6-NEXT:    s_mov_b32 s11, s5
1472; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1473; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1474; GFX6-NEXT:    s_mov_b32 s10, -1
1475; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1476; GFX6-NEXT:    s_mov_b32 s5, s11
1477; GFX6-NEXT:    s_mov_b32 s6, s10
1478; GFX6-NEXT:    s_mov_b32 s7, s9
1479; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1480; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1481; GFX6-NEXT:    s_endpgm
1482;
1483; GFX7-LABEL: global_system_monotonic_atomicrmw:
1484; GFX7:       ; %bb.0: ; %entry
1485; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1486; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1487; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1488; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1489; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1490; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1491; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1492; GFX7-NEXT:    s_endpgm
1493;
1494; GFX10-WGP-LABEL: global_system_monotonic_atomicrmw:
1495; GFX10-WGP:       ; %bb.0: ; %entry
1496; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1497; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1498; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1499; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1500; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1501; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1502; GFX10-WGP-NEXT:    s_endpgm
1503;
1504; GFX10-CU-LABEL: global_system_monotonic_atomicrmw:
1505; GFX10-CU:       ; %bb.0: ; %entry
1506; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1507; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1508; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1509; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1510; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1511; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1512; GFX10-CU-NEXT:    s_endpgm
1513;
1514; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw:
1515; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1516; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1517; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1518; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1519; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1520; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1521; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1522; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1523; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1524; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1525; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1526; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1527; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1528; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1529; SKIP-CACHE-INV-NEXT:    s_endpgm
1530;
1531; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw:
1532; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1533; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1534; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1535; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1536; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1537; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1538; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1539; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1540;
1541; GFX90A-TGSPLIT-LABEL: global_system_monotonic_atomicrmw:
1542; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1543; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1544; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1545; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1546; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1547; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1548; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1549; GFX90A-TGSPLIT-NEXT:    s_endpgm
1550;
1551; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_atomicrmw:
1552; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1553; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1554; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1555; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1556; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1557; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1558; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1559; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1560;
1561; GFX940-TGSPLIT-LABEL: global_system_monotonic_atomicrmw:
1562; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1563; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1564; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1565; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1566; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1567; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1568; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1569; GFX940-TGSPLIT-NEXT:    s_endpgm
1570;
1571; GFX11-WGP-LABEL: global_system_monotonic_atomicrmw:
1572; GFX11-WGP:       ; %bb.0: ; %entry
1573; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1574; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1575; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1576; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1577; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1578; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1579; GFX11-WGP-NEXT:    s_endpgm
1580;
1581; GFX11-CU-LABEL: global_system_monotonic_atomicrmw:
1582; GFX11-CU:       ; %bb.0: ; %entry
1583; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1584; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1585; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1586; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1587; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1588; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1589; GFX11-CU-NEXT:    s_endpgm
1590;
1591; GFX12-WGP-LABEL: global_system_monotonic_atomicrmw:
1592; GFX12-WGP:       ; %bb.0: ; %entry
1593; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1594; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1595; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1596; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1597; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1598; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1599; GFX12-WGP-NEXT:    s_endpgm
1600;
1601; GFX12-CU-LABEL: global_system_monotonic_atomicrmw:
1602; GFX12-CU:       ; %bb.0: ; %entry
1603; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1604; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1605; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1606; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1607; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1608; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1609; GFX12-CU-NEXT:    s_endpgm
1610    ptr addrspace(1) %out, i32 %in) {
1611entry:
1612  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in monotonic
1613  ret void
1614}
1615
1616define amdgpu_kernel void @global_system_acquire_atomicrmw(
1617; GFX6-LABEL: global_system_acquire_atomicrmw:
1618; GFX6:       ; %bb.0: ; %entry
1619; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1620; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1621; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1622; GFX6-NEXT:    s_mov_b32 s11, s5
1623; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1624; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1625; GFX6-NEXT:    s_mov_b32 s10, -1
1626; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1627; GFX6-NEXT:    s_mov_b32 s5, s11
1628; GFX6-NEXT:    s_mov_b32 s6, s10
1629; GFX6-NEXT:    s_mov_b32 s7, s9
1630; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1631; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1632; GFX6-NEXT:    s_waitcnt vmcnt(0)
1633; GFX6-NEXT:    buffer_wbinvl1
1634; GFX6-NEXT:    s_endpgm
1635;
1636; GFX7-LABEL: global_system_acquire_atomicrmw:
1637; GFX7:       ; %bb.0: ; %entry
1638; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1639; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1640; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1641; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1642; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1643; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1644; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1645; GFX7-NEXT:    s_waitcnt vmcnt(0)
1646; GFX7-NEXT:    buffer_wbinvl1_vol
1647; GFX7-NEXT:    s_endpgm
1648;
1649; GFX10-WGP-LABEL: global_system_acquire_atomicrmw:
1650; GFX10-WGP:       ; %bb.0: ; %entry
1651; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1652; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1653; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1654; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1655; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1656; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1657; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1658; GFX10-WGP-NEXT:    buffer_gl1_inv
1659; GFX10-WGP-NEXT:    buffer_gl0_inv
1660; GFX10-WGP-NEXT:    s_endpgm
1661;
1662; GFX10-CU-LABEL: global_system_acquire_atomicrmw:
1663; GFX10-CU:       ; %bb.0: ; %entry
1664; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1665; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1666; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1667; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1668; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1669; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1670; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1671; GFX10-CU-NEXT:    buffer_gl1_inv
1672; GFX10-CU-NEXT:    buffer_gl0_inv
1673; GFX10-CU-NEXT:    s_endpgm
1674;
1675; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw:
1676; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1677; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1678; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1679; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1680; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1681; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1682; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1683; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1684; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1685; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1686; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1687; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1688; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1689; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1690; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
1691; SKIP-CACHE-INV-NEXT:    s_endpgm
1692;
1693; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw:
1694; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1695; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1696; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1697; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1698; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1699; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1700; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1701; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1702; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
1703; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
1704; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1705;
1706; GFX90A-TGSPLIT-LABEL: global_system_acquire_atomicrmw:
1707; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1708; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1709; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1710; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1711; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1712; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1713; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1714; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1715; GFX90A-TGSPLIT-NEXT:    buffer_invl2
1716; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
1717; GFX90A-TGSPLIT-NEXT:    s_endpgm
1718;
1719; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_atomicrmw:
1720; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1721; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1722; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1723; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1724; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1725; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1726; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1727; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1728; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
1729; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1730;
1731; GFX940-TGSPLIT-LABEL: global_system_acquire_atomicrmw:
1732; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1733; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1734; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1735; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1736; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1737; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1738; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1739; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
1740; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
1741; GFX940-TGSPLIT-NEXT:    s_endpgm
1742;
1743; GFX11-WGP-LABEL: global_system_acquire_atomicrmw:
1744; GFX11-WGP:       ; %bb.0: ; %entry
1745; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1746; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1747; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1748; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1749; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1750; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1751; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1752; GFX11-WGP-NEXT:    buffer_gl1_inv
1753; GFX11-WGP-NEXT:    buffer_gl0_inv
1754; GFX11-WGP-NEXT:    s_endpgm
1755;
1756; GFX11-CU-LABEL: global_system_acquire_atomicrmw:
1757; GFX11-CU:       ; %bb.0: ; %entry
1758; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1759; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1760; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1761; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1762; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1763; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1764; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1765; GFX11-CU-NEXT:    buffer_gl1_inv
1766; GFX11-CU-NEXT:    buffer_gl0_inv
1767; GFX11-CU-NEXT:    s_endpgm
1768;
1769; GFX12-WGP-LABEL: global_system_acquire_atomicrmw:
1770; GFX12-WGP:       ; %bb.0: ; %entry
1771; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1772; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1773; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1774; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1775; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1776; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1777; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1778; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
1779; GFX12-WGP-NEXT:    s_endpgm
1780;
1781; GFX12-CU-LABEL: global_system_acquire_atomicrmw:
1782; GFX12-CU:       ; %bb.0: ; %entry
1783; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1784; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1785; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1786; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1787; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1788; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1789; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1790; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
1791; GFX12-CU-NEXT:    s_endpgm
1792    ptr addrspace(1) %out, i32 %in) {
1793entry:
1794  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
1795  ret void
1796}
1797
1798define amdgpu_kernel void @global_system_release_atomicrmw(
1799; GFX6-LABEL: global_system_release_atomicrmw:
1800; GFX6:       ; %bb.0: ; %entry
1801; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1802; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1803; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1804; GFX6-NEXT:    s_mov_b32 s11, s5
1805; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1806; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1807; GFX6-NEXT:    s_mov_b32 s10, -1
1808; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1809; GFX6-NEXT:    s_mov_b32 s5, s11
1810; GFX6-NEXT:    s_mov_b32 s6, s10
1811; GFX6-NEXT:    s_mov_b32 s7, s9
1812; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1813; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1814; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1815; GFX6-NEXT:    s_endpgm
1816;
1817; GFX7-LABEL: global_system_release_atomicrmw:
1818; GFX7:       ; %bb.0: ; %entry
1819; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
1820; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
1821; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
1822; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1823; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1824; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1825; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1826; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
1827; GFX7-NEXT:    s_endpgm
1828;
1829; GFX10-WGP-LABEL: global_system_release_atomicrmw:
1830; GFX10-WGP:       ; %bb.0: ; %entry
1831; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
1832; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1833; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
1834; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1835; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
1836; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1837; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1838; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
1839; GFX10-WGP-NEXT:    s_endpgm
1840;
1841; GFX10-CU-LABEL: global_system_release_atomicrmw:
1842; GFX10-CU:       ; %bb.0: ; %entry
1843; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
1844; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1845; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
1846; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
1847; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
1848; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1849; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1850; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
1851; GFX10-CU-NEXT:    s_endpgm
1852;
1853; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw:
1854; SKIP-CACHE-INV:       ; %bb.0: ; %entry
1855; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1856; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
1857; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
1858; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
1859; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
1860; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
1861; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
1862; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
1863; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
1864; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
1865; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
1866; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
1867; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1868; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
1869; SKIP-CACHE-INV-NEXT:    s_endpgm
1870;
1871; GFX90A-NOTTGSPLIT-LABEL: global_system_release_atomicrmw:
1872; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
1873; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1874; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1875; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1876; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1877; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1878; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
1879; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1880; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1881; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
1882;
1883; GFX90A-TGSPLIT-LABEL: global_system_release_atomicrmw:
1884; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
1885; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1886; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1887; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
1888; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1889; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
1890; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
1891; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1892; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
1893; GFX90A-TGSPLIT-NEXT:    s_endpgm
1894;
1895; GFX940-NOTTGSPLIT-LABEL: global_system_release_atomicrmw:
1896; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
1897; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1898; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1899; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1900; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1901; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1902; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1903; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1904; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1905; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
1906;
1907; GFX940-TGSPLIT-LABEL: global_system_release_atomicrmw:
1908; GFX940-TGSPLIT:       ; %bb.0: ; %entry
1909; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
1910; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
1911; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
1912; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
1913; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
1914; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
1915; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1916; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
1917; GFX940-TGSPLIT-NEXT:    s_endpgm
1918;
1919; GFX11-WGP-LABEL: global_system_release_atomicrmw:
1920; GFX11-WGP:       ; %bb.0: ; %entry
1921; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
1922; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1923; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1924; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
1925; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
1926; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1927; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
1928; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1929; GFX11-WGP-NEXT:    s_endpgm
1930;
1931; GFX11-CU-LABEL: global_system_release_atomicrmw:
1932; GFX11-CU:       ; %bb.0: ; %entry
1933; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
1934; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1935; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1936; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
1937; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
1938; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1939; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
1940; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
1941; GFX11-CU-NEXT:    s_endpgm
1942;
1943; GFX12-WGP-LABEL: global_system_release_atomicrmw:
1944; GFX12-WGP:       ; %bb.0: ; %entry
1945; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
1946; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1947; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
1948; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
1949; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
1950; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
1951; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
1952; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
1953; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
1954; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
1955; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1956; GFX12-WGP-NEXT:    s_endpgm
1957;
1958; GFX12-CU-LABEL: global_system_release_atomicrmw:
1959; GFX12-CU:       ; %bb.0: ; %entry
1960; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
1961; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
1962; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
1963; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
1964; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
1965; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
1966; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
1967; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
1968; GFX12-CU-NEXT:    s_wait_storecnt 0x0
1969; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
1970; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
1971; GFX12-CU-NEXT:    s_endpgm
1972    ptr addrspace(1) %out, i32 %in) {
1973entry:
1974  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in release
1975  ret void
1976}
1977
1978define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
1979; GFX6-LABEL: global_system_acq_rel_atomicrmw:
1980; GFX6:       ; %bb.0: ; %entry
1981; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
1982; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
1983; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
1984; GFX6-NEXT:    s_mov_b32 s11, s5
1985; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
1986; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
1987; GFX6-NEXT:    s_mov_b32 s10, -1
1988; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
1989; GFX6-NEXT:    s_mov_b32 s5, s11
1990; GFX6-NEXT:    s_mov_b32 s6, s10
1991; GFX6-NEXT:    s_mov_b32 s7, s9
1992; GFX6-NEXT:    v_mov_b32_e32 v0, s8
1993; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1994; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
1995; GFX6-NEXT:    s_waitcnt vmcnt(0)
1996; GFX6-NEXT:    buffer_wbinvl1
1997; GFX6-NEXT:    s_endpgm
1998;
1999; GFX7-LABEL: global_system_acq_rel_atomicrmw:
2000; GFX7:       ; %bb.0: ; %entry
2001; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
2002; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
2003; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2004; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2005; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2006; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2007; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2008; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
2009; GFX7-NEXT:    s_waitcnt vmcnt(0)
2010; GFX7-NEXT:    buffer_wbinvl1_vol
2011; GFX7-NEXT:    s_endpgm
2012;
2013; GFX10-WGP-LABEL: global_system_acq_rel_atomicrmw:
2014; GFX10-WGP:       ; %bb.0: ; %entry
2015; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2016; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2017; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2018; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2019; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2020; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2021; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2022; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
2023; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2024; GFX10-WGP-NEXT:    buffer_gl1_inv
2025; GFX10-WGP-NEXT:    buffer_gl0_inv
2026; GFX10-WGP-NEXT:    s_endpgm
2027;
2028; GFX10-CU-LABEL: global_system_acq_rel_atomicrmw:
2029; GFX10-CU:       ; %bb.0: ; %entry
2030; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2031; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2032; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2033; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2034; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2035; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2036; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2037; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
2038; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2039; GFX10-CU-NEXT:    buffer_gl1_inv
2040; GFX10-CU-NEXT:    buffer_gl0_inv
2041; GFX10-CU-NEXT:    s_endpgm
2042;
2043; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw:
2044; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2045; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2046; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2047; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2048; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2049; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2050; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2051; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2052; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2053; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2054; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2055; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2056; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2057; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2058; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
2059; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2060; SKIP-CACHE-INV-NEXT:    s_endpgm
2061;
2062; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw:
2063; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2064; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2065; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2066; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2067; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2068; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2069; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
2070; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2071; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2072; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2073; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
2074; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2075; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2076;
2077; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw:
2078; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2079; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2080; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2081; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2082; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2083; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2084; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
2085; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2086; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2087; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2088; GFX90A-TGSPLIT-NEXT:    buffer_invl2
2089; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2090; GFX90A-TGSPLIT-NEXT:    s_endpgm
2091;
2092; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_atomicrmw:
2093; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2094; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2095; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2096; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2097; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2098; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2099; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2100; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2101; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
2102; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2103; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
2104; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2105;
2106; GFX940-TGSPLIT-LABEL: global_system_acq_rel_atomicrmw:
2107; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2108; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2109; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2110; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2111; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2112; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2113; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2114; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2115; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
2116; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2117; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
2118; GFX940-TGSPLIT-NEXT:    s_endpgm
2119;
2120; GFX11-WGP-LABEL: global_system_acq_rel_atomicrmw:
2121; GFX11-WGP:       ; %bb.0: ; %entry
2122; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2123; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2124; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2125; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2126; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2127; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2128; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2129; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2130; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2131; GFX11-WGP-NEXT:    buffer_gl1_inv
2132; GFX11-WGP-NEXT:    buffer_gl0_inv
2133; GFX11-WGP-NEXT:    s_endpgm
2134;
2135; GFX11-CU-LABEL: global_system_acq_rel_atomicrmw:
2136; GFX11-CU:       ; %bb.0: ; %entry
2137; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2138; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2139; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2140; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2141; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2142; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2143; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2144; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2145; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2146; GFX11-CU-NEXT:    buffer_gl1_inv
2147; GFX11-CU-NEXT:    buffer_gl0_inv
2148; GFX11-CU-NEXT:    s_endpgm
2149;
2150; GFX12-WGP-LABEL: global_system_acq_rel_atomicrmw:
2151; GFX12-WGP:       ; %bb.0: ; %entry
2152; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2153; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2154; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2155; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2156; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2157; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
2158; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2159; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2160; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2161; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2162; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
2163; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2164; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
2165; GFX12-WGP-NEXT:    s_endpgm
2166;
2167; GFX12-CU-LABEL: global_system_acq_rel_atomicrmw:
2168; GFX12-CU:       ; %bb.0: ; %entry
2169; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2170; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2171; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2172; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2173; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2174; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
2175; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2176; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2177; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2178; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2179; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
2180; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2181; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
2182; GFX12-CU-NEXT:    s_endpgm
2183    ptr addrspace(1) %out, i32 %in) {
2184entry:
2185  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
2186  ret void
2187}
2188
2189define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
2190; GFX6-LABEL: global_system_seq_cst_atomicrmw:
2191; GFX6:       ; %bb.0: ; %entry
2192; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2193; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2194; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2195; GFX6-NEXT:    s_mov_b32 s11, s5
2196; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2197; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2198; GFX6-NEXT:    s_mov_b32 s10, -1
2199; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2200; GFX6-NEXT:    s_mov_b32 s5, s11
2201; GFX6-NEXT:    s_mov_b32 s6, s10
2202; GFX6-NEXT:    s_mov_b32 s7, s9
2203; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2204; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2205; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
2206; GFX6-NEXT:    s_waitcnt vmcnt(0)
2207; GFX6-NEXT:    buffer_wbinvl1
2208; GFX6-NEXT:    s_endpgm
2209;
2210; GFX7-LABEL: global_system_seq_cst_atomicrmw:
2211; GFX7:       ; %bb.0: ; %entry
2212; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
2213; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
2214; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2215; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2216; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2217; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2218; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2219; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
2220; GFX7-NEXT:    s_waitcnt vmcnt(0)
2221; GFX7-NEXT:    buffer_wbinvl1_vol
2222; GFX7-NEXT:    s_endpgm
2223;
2224; GFX10-WGP-LABEL: global_system_seq_cst_atomicrmw:
2225; GFX10-WGP:       ; %bb.0: ; %entry
2226; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2227; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2228; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2229; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2230; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2231; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2232; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2233; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
2234; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2235; GFX10-WGP-NEXT:    buffer_gl1_inv
2236; GFX10-WGP-NEXT:    buffer_gl0_inv
2237; GFX10-WGP-NEXT:    s_endpgm
2238;
2239; GFX10-CU-LABEL: global_system_seq_cst_atomicrmw:
2240; GFX10-CU:       ; %bb.0: ; %entry
2241; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2242; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2243; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2244; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2245; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2246; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2247; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2248; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
2249; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2250; GFX10-CU-NEXT:    buffer_gl1_inv
2251; GFX10-CU-NEXT:    buffer_gl0_inv
2252; GFX10-CU-NEXT:    s_endpgm
2253;
2254; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw:
2255; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2256; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2257; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2258; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2259; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2260; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2261; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2262; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2263; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2264; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2265; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2266; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2267; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2268; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2269; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
2270; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2271; SKIP-CACHE-INV-NEXT:    s_endpgm
2272;
2273; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw:
2274; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2275; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2276; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2277; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2278; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2279; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2280; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
2281; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2282; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2283; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2284; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
2285; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2286; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2287;
2288; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw:
2289; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2290; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2291; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2292; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2293; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2294; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2295; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
2296; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2297; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
2298; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2299; GFX90A-TGSPLIT-NEXT:    buffer_invl2
2300; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2301; GFX90A-TGSPLIT-NEXT:    s_endpgm
2302;
2303; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_atomicrmw:
2304; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2305; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2306; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2307; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2308; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2309; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2310; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2311; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2312; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
2313; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2314; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
2315; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2316;
2317; GFX940-TGSPLIT-LABEL: global_system_seq_cst_atomicrmw:
2318; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2319; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2320; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2321; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2322; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2323; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2324; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2325; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2326; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
2327; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2328; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
2329; GFX940-TGSPLIT-NEXT:    s_endpgm
2330;
2331; GFX11-WGP-LABEL: global_system_seq_cst_atomicrmw:
2332; GFX11-WGP:       ; %bb.0: ; %entry
2333; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2334; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2335; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2336; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2337; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2338; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2339; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2340; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2341; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2342; GFX11-WGP-NEXT:    buffer_gl1_inv
2343; GFX11-WGP-NEXT:    buffer_gl0_inv
2344; GFX11-WGP-NEXT:    s_endpgm
2345;
2346; GFX11-CU-LABEL: global_system_seq_cst_atomicrmw:
2347; GFX11-CU:       ; %bb.0: ; %entry
2348; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2349; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2350; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2351; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2352; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2353; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2354; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2355; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
2356; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2357; GFX11-CU-NEXT:    buffer_gl1_inv
2358; GFX11-CU-NEXT:    buffer_gl0_inv
2359; GFX11-CU-NEXT:    s_endpgm
2360;
2361; GFX12-WGP-LABEL: global_system_seq_cst_atomicrmw:
2362; GFX12-WGP:       ; %bb.0: ; %entry
2363; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2364; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2365; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2366; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2367; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2368; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
2369; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2370; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2371; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2372; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2373; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
2374; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2375; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
2376; GFX12-WGP-NEXT:    s_endpgm
2377;
2378; GFX12-CU-LABEL: global_system_seq_cst_atomicrmw:
2379; GFX12-CU:       ; %bb.0: ; %entry
2380; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2381; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2382; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2383; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2384; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2385; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
2386; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2387; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2388; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2389; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2390; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
2391; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2392; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
2393; GFX12-CU-NEXT:    s_endpgm
2394    ptr addrspace(1) %out, i32 %in) {
2395entry:
2396  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
2397  ret void
2398}
2399
2400define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
2401; GFX6-LABEL: global_system_acquire_ret_atomicrmw:
2402; GFX6:       ; %bb.0: ; %entry
2403; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2404; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2405; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2406; GFX6-NEXT:    s_mov_b32 s11, s5
2407; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2408; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2409; GFX6-NEXT:    s_mov_b32 s10, -1
2410; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2411; GFX6-NEXT:    s_mov_b32 s5, s11
2412; GFX6-NEXT:    s_mov_b32 s6, s10
2413; GFX6-NEXT:    s_mov_b32 s7, s9
2414; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2415; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2416; GFX6-NEXT:    s_waitcnt vmcnt(0)
2417; GFX6-NEXT:    buffer_wbinvl1
2418; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2419; GFX6-NEXT:    s_endpgm
2420;
2421; GFX7-LABEL: global_system_acquire_ret_atomicrmw:
2422; GFX7:       ; %bb.0: ; %entry
2423; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2424; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2425; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2426; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2427; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2428; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2429; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2430; GFX7-NEXT:    s_waitcnt vmcnt(0)
2431; GFX7-NEXT:    buffer_wbinvl1_vol
2432; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2433; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2434; GFX7-NEXT:    flat_store_dword v[0:1], v2
2435; GFX7-NEXT:    s_endpgm
2436;
2437; GFX10-WGP-LABEL: global_system_acquire_ret_atomicrmw:
2438; GFX10-WGP:       ; %bb.0: ; %entry
2439; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2440; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2441; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2442; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2443; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2444; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2445; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2446; GFX10-WGP-NEXT:    buffer_gl1_inv
2447; GFX10-WGP-NEXT:    buffer_gl0_inv
2448; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2449; GFX10-WGP-NEXT:    s_endpgm
2450;
2451; GFX10-CU-LABEL: global_system_acquire_ret_atomicrmw:
2452; GFX10-CU:       ; %bb.0: ; %entry
2453; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2454; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2455; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2456; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2457; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2458; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2459; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2460; GFX10-CU-NEXT:    buffer_gl1_inv
2461; GFX10-CU-NEXT:    buffer_gl0_inv
2462; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2463; GFX10-CU-NEXT:    s_endpgm
2464;
2465; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw:
2466; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2467; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2468; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2469; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2470; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2471; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2472; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2473; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2474; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2475; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2476; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2477; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2478; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2479; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2480; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2481; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2482; SKIP-CACHE-INV-NEXT:    s_endpgm
2483;
2484; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw:
2485; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2486; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2487; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2488; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2489; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2490; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2491; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2492; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2493; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
2494; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2495; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2496; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2497;
2498; GFX90A-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw:
2499; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2500; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2501; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2502; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2503; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2504; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2505; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2506; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2507; GFX90A-TGSPLIT-NEXT:    buffer_invl2
2508; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2509; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2510; GFX90A-TGSPLIT-NEXT:    s_endpgm
2511;
2512; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_ret_atomicrmw:
2513; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2514; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2515; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2516; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2517; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2518; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2519; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2520; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2521; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
2522; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2523; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2524;
2525; GFX940-TGSPLIT-LABEL: global_system_acquire_ret_atomicrmw:
2526; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2527; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2528; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2529; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2530; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2531; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2532; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2533; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2534; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
2535; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2536; GFX940-TGSPLIT-NEXT:    s_endpgm
2537;
2538; GFX11-WGP-LABEL: global_system_acquire_ret_atomicrmw:
2539; GFX11-WGP:       ; %bb.0: ; %entry
2540; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2541; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2542; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2543; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2544; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2545; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2546; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2547; GFX11-WGP-NEXT:    buffer_gl1_inv
2548; GFX11-WGP-NEXT:    buffer_gl0_inv
2549; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2550; GFX11-WGP-NEXT:    s_endpgm
2551;
2552; GFX11-CU-LABEL: global_system_acquire_ret_atomicrmw:
2553; GFX11-CU:       ; %bb.0: ; %entry
2554; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2555; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2556; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2557; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2558; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2559; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2560; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
2561; GFX11-CU-NEXT:    buffer_gl1_inv
2562; GFX11-CU-NEXT:    buffer_gl0_inv
2563; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2564; GFX11-CU-NEXT:    s_endpgm
2565;
2566; GFX12-WGP-LABEL: global_system_acquire_ret_atomicrmw:
2567; GFX12-WGP:       ; %bb.0: ; %entry
2568; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2569; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2570; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2571; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2572; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2573; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
2574; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
2575; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
2576; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2577; GFX12-WGP-NEXT:    s_endpgm
2578;
2579; GFX12-CU-LABEL: global_system_acquire_ret_atomicrmw:
2580; GFX12-CU:       ; %bb.0: ; %entry
2581; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2582; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2583; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2584; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2585; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2586; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
2587; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
2588; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
2589; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2590; GFX12-CU-NEXT:    s_endpgm
2591    ptr addrspace(1) %out, i32 %in) {
2592entry:
2593  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acquire
2594  store i32 %val, ptr addrspace(1) %out, align 4
2595  ret void
2596}
2597
2598define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
2599; GFX6-LABEL: global_system_acq_rel_ret_atomicrmw:
2600; GFX6:       ; %bb.0: ; %entry
2601; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2602; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2603; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2604; GFX6-NEXT:    s_mov_b32 s11, s5
2605; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2606; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2607; GFX6-NEXT:    s_mov_b32 s10, -1
2608; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2609; GFX6-NEXT:    s_mov_b32 s5, s11
2610; GFX6-NEXT:    s_mov_b32 s6, s10
2611; GFX6-NEXT:    s_mov_b32 s7, s9
2612; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2613; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2614; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2615; GFX6-NEXT:    s_waitcnt vmcnt(0)
2616; GFX6-NEXT:    buffer_wbinvl1
2617; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2618; GFX6-NEXT:    s_endpgm
2619;
2620; GFX7-LABEL: global_system_acq_rel_ret_atomicrmw:
2621; GFX7:       ; %bb.0: ; %entry
2622; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2623; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2624; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2625; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2626; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2627; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2628; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2629; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2630; GFX7-NEXT:    s_waitcnt vmcnt(0)
2631; GFX7-NEXT:    buffer_wbinvl1_vol
2632; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2633; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2634; GFX7-NEXT:    flat_store_dword v[0:1], v2
2635; GFX7-NEXT:    s_endpgm
2636;
2637; GFX10-WGP-LABEL: global_system_acq_rel_ret_atomicrmw:
2638; GFX10-WGP:       ; %bb.0: ; %entry
2639; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2640; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2641; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2642; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2643; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2644; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2645; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2646; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2647; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2648; GFX10-WGP-NEXT:    buffer_gl1_inv
2649; GFX10-WGP-NEXT:    buffer_gl0_inv
2650; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2651; GFX10-WGP-NEXT:    s_endpgm
2652;
2653; GFX10-CU-LABEL: global_system_acq_rel_ret_atomicrmw:
2654; GFX10-CU:       ; %bb.0: ; %entry
2655; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2656; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2657; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2658; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2659; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2660; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2661; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2662; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2663; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2664; GFX10-CU-NEXT:    buffer_gl1_inv
2665; GFX10-CU-NEXT:    buffer_gl0_inv
2666; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2667; GFX10-CU-NEXT:    s_endpgm
2668;
2669; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw:
2670; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2671; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2672; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2673; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2674; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2675; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2676; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2677; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2678; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2679; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2680; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2681; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2682; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2683; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2684; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2685; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2686; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2687; SKIP-CACHE-INV-NEXT:    s_endpgm
2688;
2689; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw:
2690; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2691; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2692; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2693; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2694; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2695; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2696; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
2697; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2698; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2699; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2700; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
2701; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2702; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2703; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2704;
2705; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw:
2706; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2707; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2708; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2709; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2710; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2711; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2712; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
2713; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2714; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2715; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2716; GFX90A-TGSPLIT-NEXT:    buffer_invl2
2717; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2718; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2719; GFX90A-TGSPLIT-NEXT:    s_endpgm
2720;
2721; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw:
2722; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2723; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2724; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2725; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2726; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2727; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2728; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2729; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2730; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2731; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2732; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
2733; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2734; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2735;
2736; GFX940-TGSPLIT-LABEL: global_system_acq_rel_ret_atomicrmw:
2737; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2738; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2739; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2740; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2741; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2742; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2743; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2744; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2745; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2746; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2747; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
2748; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2749; GFX940-TGSPLIT-NEXT:    s_endpgm
2750;
2751; GFX11-WGP-LABEL: global_system_acq_rel_ret_atomicrmw:
2752; GFX11-WGP:       ; %bb.0: ; %entry
2753; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2754; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2755; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2756; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2757; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2758; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2759; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2760; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2761; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2762; GFX11-WGP-NEXT:    buffer_gl1_inv
2763; GFX11-WGP-NEXT:    buffer_gl0_inv
2764; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2765; GFX11-WGP-NEXT:    s_endpgm
2766;
2767; GFX11-CU-LABEL: global_system_acq_rel_ret_atomicrmw:
2768; GFX11-CU:       ; %bb.0: ; %entry
2769; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
2770; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2771; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2772; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
2773; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
2774; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2775; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2776; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2777; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
2778; GFX11-CU-NEXT:    buffer_gl1_inv
2779; GFX11-CU-NEXT:    buffer_gl0_inv
2780; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2781; GFX11-CU-NEXT:    s_endpgm
2782;
2783; GFX12-WGP-LABEL: global_system_acq_rel_ret_atomicrmw:
2784; GFX12-WGP:       ; %bb.0: ; %entry
2785; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
2786; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2787; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2788; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
2789; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
2790; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
2791; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2792; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2793; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
2794; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
2795; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
2796; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
2797; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
2798; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
2799; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
2800; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2801; GFX12-WGP-NEXT:    s_endpgm
2802;
2803; GFX12-CU-LABEL: global_system_acq_rel_ret_atomicrmw:
2804; GFX12-CU:       ; %bb.0: ; %entry
2805; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
2806; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2807; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
2808; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
2809; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
2810; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
2811; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2812; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2813; GFX12-CU-NEXT:    s_wait_storecnt 0x0
2814; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
2815; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
2816; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
2817; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
2818; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
2819; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
2820; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
2821; GFX12-CU-NEXT:    s_endpgm
2822    ptr addrspace(1) %out, i32 %in) {
2823entry:
2824  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in acq_rel
2825  store i32 %val, ptr addrspace(1) %out, align 4
2826  ret void
2827}
2828
2829define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
2830; GFX6-LABEL: global_system_seq_cst_ret_atomicrmw:
2831; GFX6:       ; %bb.0: ; %entry
2832; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2833; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
2834; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
2835; GFX6-NEXT:    s_mov_b32 s11, s5
2836; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
2837; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
2838; GFX6-NEXT:    s_mov_b32 s10, -1
2839; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
2840; GFX6-NEXT:    s_mov_b32 s5, s11
2841; GFX6-NEXT:    s_mov_b32 s6, s10
2842; GFX6-NEXT:    s_mov_b32 s7, s9
2843; GFX6-NEXT:    v_mov_b32_e32 v0, s8
2844; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2845; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
2846; GFX6-NEXT:    s_waitcnt vmcnt(0)
2847; GFX6-NEXT:    buffer_wbinvl1
2848; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2849; GFX6-NEXT:    s_endpgm
2850;
2851; GFX7-LABEL: global_system_seq_cst_ret_atomicrmw:
2852; GFX7:       ; %bb.0: ; %entry
2853; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2854; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
2855; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
2856; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2857; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2858; GFX7-NEXT:    v_mov_b32_e32 v2, s6
2859; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2860; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
2861; GFX7-NEXT:    s_waitcnt vmcnt(0)
2862; GFX7-NEXT:    buffer_wbinvl1_vol
2863; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2864; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2865; GFX7-NEXT:    flat_store_dword v[0:1], v2
2866; GFX7-NEXT:    s_endpgm
2867;
2868; GFX10-WGP-LABEL: global_system_seq_cst_ret_atomicrmw:
2869; GFX10-WGP:       ; %bb.0: ; %entry
2870; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
2871; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2872; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
2873; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2874; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
2875; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2876; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2877; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2878; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
2879; GFX10-WGP-NEXT:    buffer_gl1_inv
2880; GFX10-WGP-NEXT:    buffer_gl0_inv
2881; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
2882; GFX10-WGP-NEXT:    s_endpgm
2883;
2884; GFX10-CU-LABEL: global_system_seq_cst_ret_atomicrmw:
2885; GFX10-CU:       ; %bb.0: ; %entry
2886; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
2887; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2888; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
2889; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
2890; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
2891; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2892; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
2893; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2894; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
2895; GFX10-CU-NEXT:    buffer_gl1_inv
2896; GFX10-CU-NEXT:    buffer_gl0_inv
2897; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
2898; GFX10-CU-NEXT:    s_endpgm
2899;
2900; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw:
2901; SKIP-CACHE-INV:       ; %bb.0: ; %entry
2902; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2903; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
2904; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
2905; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
2906; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
2907; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
2908; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
2909; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
2910; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
2911; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
2912; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
2913; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
2914; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2915; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
2916; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
2917; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
2918; SKIP-CACHE-INV-NEXT:    s_endpgm
2919;
2920; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw:
2921; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
2922; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2923; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2924; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2925; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2926; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2927; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
2928; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2929; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2930; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2931; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
2932; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
2933; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2934; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
2935;
2936; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw:
2937; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
2938; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2939; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
2940; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
2941; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2942; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
2943; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
2944; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2945; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
2946; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2947; GFX90A-TGSPLIT-NEXT:    buffer_invl2
2948; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
2949; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
2950; GFX90A-TGSPLIT-NEXT:    s_endpgm
2951;
2952; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw:
2953; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
2954; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2955; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2956; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2957; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2958; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2959; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2960; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2961; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2962; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2963; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
2964; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2965; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
2966;
2967; GFX940-TGSPLIT-LABEL: global_system_seq_cst_ret_atomicrmw:
2968; GFX940-TGSPLIT:       ; %bb.0: ; %entry
2969; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
2970; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
2971; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
2972; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
2973; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
2974; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
2975; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2976; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
2977; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
2978; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
2979; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
2980; GFX940-TGSPLIT-NEXT:    s_endpgm
2981;
2982; GFX11-WGP-LABEL: global_system_seq_cst_ret_atomicrmw:
2983; GFX11-WGP:       ; %bb.0: ; %entry
2984; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
2985; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
2986; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
2987; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
2988; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
2989; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2990; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
2991; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
2992; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
2993; GFX11-WGP-NEXT:    buffer_gl1_inv
2994; GFX11-WGP-NEXT:    buffer_gl0_inv
2995; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
2996; GFX11-WGP-NEXT:    s_endpgm
2997;
2998; GFX11-CU-LABEL: global_system_seq_cst_ret_atomicrmw:
2999; GFX11-CU:       ; %bb.0: ; %entry
3000; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3001; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3002; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
3003; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3004; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
3005; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3006; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3007; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
3008; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
3009; GFX11-CU-NEXT:    buffer_gl1_inv
3010; GFX11-CU-NEXT:    buffer_gl0_inv
3011; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
3012; GFX11-CU-NEXT:    s_endpgm
3013;
3014; GFX12-WGP-LABEL: global_system_seq_cst_ret_atomicrmw:
3015; GFX12-WGP:       ; %bb.0: ; %entry
3016; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3017; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3018; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
3019; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3020; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
3021; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
3022; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
3023; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
3024; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3025; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
3026; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
3027; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
3028; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
3029; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
3030; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
3031; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
3032; GFX12-WGP-NEXT:    s_endpgm
3033;
3034; GFX12-CU-LABEL: global_system_seq_cst_ret_atomicrmw:
3035; GFX12-CU:       ; %bb.0: ; %entry
3036; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3037; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3038; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
3039; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3040; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
3041; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
3042; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3043; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3044; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3045; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
3046; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
3047; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3048; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3049; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
3050; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
3051; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
3052; GFX12-CU-NEXT:    s_endpgm
3053    ptr addrspace(1) %out, i32 %in) {
3054entry:
3055  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst
3056  store i32 %val, ptr addrspace(1) %out, align 4
3057  ret void
3058}
3059
3060define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
3061; GFX6-LABEL: global_system_monotonic_monotonic_cmpxchg:
3062; GFX6:       ; %bb.0: ; %entry
3063; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3064; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3065; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3066; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3067; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3068; GFX6-NEXT:    s_mov_b32 s12, s5
3069; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3070; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3071; GFX6-NEXT:    s_mov_b32 s11, -1
3072; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3073; GFX6-NEXT:    s_mov_b32 s5, s12
3074; GFX6-NEXT:    s_mov_b32 s6, s11
3075; GFX6-NEXT:    s_mov_b32 s7, s10
3076; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3077; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3078; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3079; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3080; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3081; GFX6-NEXT:    s_endpgm
3082;
3083; GFX7-LABEL: global_system_monotonic_monotonic_cmpxchg:
3084; GFX7:       ; %bb.0: ; %entry
3085; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3086; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3087; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3088; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3089; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3090; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3091; GFX7-NEXT:    s_mov_b32 s4, s8
3092; GFX7-NEXT:    s_mov_b32 s5, s9
3093; GFX7-NEXT:    s_mov_b32 s9, s10
3094; GFX7-NEXT:    s_mov_b32 s8, s11
3095; GFX7-NEXT:    s_add_u32 s4, s4, s9
3096; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3097; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3098; GFX7-NEXT:    s_mov_b32 s5, s8
3099; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3100; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3101; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3102; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3103; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3104; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3105; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3106; GFX7-NEXT:    s_endpgm
3107;
3108; GFX10-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg:
3109; GFX10-WGP:       ; %bb.0: ; %entry
3110; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3111; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3112; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3113; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3114; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3115; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3116; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3117; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3118; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3119; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3120; GFX10-WGP-NEXT:    s_endpgm
3121;
3122; GFX10-CU-LABEL: global_system_monotonic_monotonic_cmpxchg:
3123; GFX10-CU:       ; %bb.0: ; %entry
3124; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3125; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3126; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3127; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3128; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3129; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3130; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3131; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3132; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3133; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3134; GFX10-CU-NEXT:    s_endpgm
3135;
3136; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg:
3137; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3138; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3139; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3140; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3141; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3142; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3143; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3144; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3145; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3146; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3147; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3148; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3149; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3150; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3151; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3152; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3153; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3154; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3155; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3156; SKIP-CACHE-INV-NEXT:    s_endpgm
3157;
3158; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg:
3159; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3160; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3161; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3162; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3163; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3164; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3165; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3166; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3167; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3168; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3169; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3170; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3171;
3172; GFX90A-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg:
3173; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3174; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3175; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3176; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3177; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3178; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3179; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3180; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3181; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3182; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3183; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3184; GFX90A-TGSPLIT-NEXT:    s_endpgm
3185;
3186; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg:
3187; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3188; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3189; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3190; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3191; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3192; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3193; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3194; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3195; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3196; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3197; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3198; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3199;
3200; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_cmpxchg:
3201; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3202; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3203; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3204; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3205; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3206; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3207; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3208; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3209; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3210; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3211; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3212; GFX940-TGSPLIT-NEXT:    s_endpgm
3213;
3214; GFX11-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg:
3215; GFX11-WGP:       ; %bb.0: ; %entry
3216; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3217; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3218; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3219; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3220; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3221; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3222; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3223; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3224; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3225; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3226; GFX11-WGP-NEXT:    s_endpgm
3227;
3228; GFX11-CU-LABEL: global_system_monotonic_monotonic_cmpxchg:
3229; GFX11-CU:       ; %bb.0: ; %entry
3230; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3231; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3232; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3233; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3234; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3235; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3236; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3237; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3238; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3239; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3240; GFX11-CU-NEXT:    s_endpgm
3241;
3242; GFX12-WGP-LABEL: global_system_monotonic_monotonic_cmpxchg:
3243; GFX12-WGP:       ; %bb.0: ; %entry
3244; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3245; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3246; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3247; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3248; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3249; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3250; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3251; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3252; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3253; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3254; GFX12-WGP-NEXT:    s_endpgm
3255;
3256; GFX12-CU-LABEL: global_system_monotonic_monotonic_cmpxchg:
3257; GFX12-CU:       ; %bb.0: ; %entry
3258; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3259; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3260; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3261; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3262; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3263; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3264; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3265; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3266; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3267; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3268; GFX12-CU-NEXT:    s_endpgm
3269    ptr addrspace(1) %out, i32 %in, i32 %old) {
3270entry:
3271  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3272  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic
3273  ret void
3274}
3275
3276define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
3277; GFX6-LABEL: global_system_acquire_monotonic_cmpxchg:
3278; GFX6:       ; %bb.0: ; %entry
3279; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3280; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3281; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3282; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3283; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3284; GFX6-NEXT:    s_mov_b32 s12, s5
3285; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3286; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3287; GFX6-NEXT:    s_mov_b32 s11, -1
3288; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3289; GFX6-NEXT:    s_mov_b32 s5, s12
3290; GFX6-NEXT:    s_mov_b32 s6, s11
3291; GFX6-NEXT:    s_mov_b32 s7, s10
3292; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3293; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3294; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3295; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3296; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3297; GFX6-NEXT:    s_waitcnt vmcnt(0)
3298; GFX6-NEXT:    buffer_wbinvl1
3299; GFX6-NEXT:    s_endpgm
3300;
3301; GFX7-LABEL: global_system_acquire_monotonic_cmpxchg:
3302; GFX7:       ; %bb.0: ; %entry
3303; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3304; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3305; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3306; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3307; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3308; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3309; GFX7-NEXT:    s_mov_b32 s4, s8
3310; GFX7-NEXT:    s_mov_b32 s5, s9
3311; GFX7-NEXT:    s_mov_b32 s9, s10
3312; GFX7-NEXT:    s_mov_b32 s8, s11
3313; GFX7-NEXT:    s_add_u32 s4, s4, s9
3314; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3315; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3316; GFX7-NEXT:    s_mov_b32 s5, s8
3317; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3318; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3319; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3320; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3321; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3322; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3323; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3324; GFX7-NEXT:    s_waitcnt vmcnt(0)
3325; GFX7-NEXT:    buffer_wbinvl1_vol
3326; GFX7-NEXT:    s_endpgm
3327;
3328; GFX10-WGP-LABEL: global_system_acquire_monotonic_cmpxchg:
3329; GFX10-WGP:       ; %bb.0: ; %entry
3330; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3331; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3332; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3333; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3334; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3335; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3336; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3337; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3338; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3339; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3340; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3341; GFX10-WGP-NEXT:    buffer_gl1_inv
3342; GFX10-WGP-NEXT:    buffer_gl0_inv
3343; GFX10-WGP-NEXT:    s_endpgm
3344;
3345; GFX10-CU-LABEL: global_system_acquire_monotonic_cmpxchg:
3346; GFX10-CU:       ; %bb.0: ; %entry
3347; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3348; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3349; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3350; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3351; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3352; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3353; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3354; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3355; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3356; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3357; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3358; GFX10-CU-NEXT:    buffer_gl1_inv
3359; GFX10-CU-NEXT:    buffer_gl0_inv
3360; GFX10-CU-NEXT:    s_endpgm
3361;
3362; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg:
3363; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3364; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3365; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3366; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3367; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3368; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3369; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3370; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3371; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3372; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3373; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3374; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3375; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3376; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3377; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3378; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3379; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3380; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3381; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3382; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
3383; SKIP-CACHE-INV-NEXT:    s_endpgm
3384;
3385; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg:
3386; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3387; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3388; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3389; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3390; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3391; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3392; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3393; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3394; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3395; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3396; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3397; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3398; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
3399; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
3400; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3401;
3402; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg:
3403; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3404; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3405; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3406; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3407; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3408; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3409; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3410; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3411; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3412; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3413; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3414; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3415; GFX90A-TGSPLIT-NEXT:    buffer_invl2
3416; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
3417; GFX90A-TGSPLIT-NEXT:    s_endpgm
3418;
3419; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg:
3420; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3421; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3422; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3423; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3424; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3425; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3426; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3427; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3428; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3429; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3430; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3431; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3432; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
3433; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3434;
3435; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_cmpxchg:
3436; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3437; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3438; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3439; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3440; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3441; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3442; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3443; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3444; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3445; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3446; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3447; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3448; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
3449; GFX940-TGSPLIT-NEXT:    s_endpgm
3450;
3451; GFX11-WGP-LABEL: global_system_acquire_monotonic_cmpxchg:
3452; GFX11-WGP:       ; %bb.0: ; %entry
3453; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3454; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3455; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3456; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3457; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3458; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3459; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3460; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3461; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3462; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3463; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3464; GFX11-WGP-NEXT:    buffer_gl1_inv
3465; GFX11-WGP-NEXT:    buffer_gl0_inv
3466; GFX11-WGP-NEXT:    s_endpgm
3467;
3468; GFX11-CU-LABEL: global_system_acquire_monotonic_cmpxchg:
3469; GFX11-CU:       ; %bb.0: ; %entry
3470; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3471; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3472; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3473; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3474; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3475; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3476; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3477; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3478; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3479; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3480; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3481; GFX11-CU-NEXT:    buffer_gl1_inv
3482; GFX11-CU-NEXT:    buffer_gl0_inv
3483; GFX11-CU-NEXT:    s_endpgm
3484;
3485; GFX12-WGP-LABEL: global_system_acquire_monotonic_cmpxchg:
3486; GFX12-WGP:       ; %bb.0: ; %entry
3487; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3488; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3489; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3490; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3491; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3492; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3493; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3494; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3495; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3496; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3497; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3498; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
3499; GFX12-WGP-NEXT:    s_endpgm
3500;
3501; GFX12-CU-LABEL: global_system_acquire_monotonic_cmpxchg:
3502; GFX12-CU:       ; %bb.0: ; %entry
3503; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3504; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3505; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3506; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3507; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3508; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3509; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3510; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3511; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3512; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3513; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3514; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
3515; GFX12-CU-NEXT:    s_endpgm
3516    ptr addrspace(1) %out, i32 %in, i32 %old) {
3517entry:
3518  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3519  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic
3520  ret void
3521}
3522
3523define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
3524; GFX6-LABEL: global_system_release_monotonic_cmpxchg:
3525; GFX6:       ; %bb.0: ; %entry
3526; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3527; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3528; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3529; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3530; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3531; GFX6-NEXT:    s_mov_b32 s12, s5
3532; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3533; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3534; GFX6-NEXT:    s_mov_b32 s11, -1
3535; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3536; GFX6-NEXT:    s_mov_b32 s5, s12
3537; GFX6-NEXT:    s_mov_b32 s6, s11
3538; GFX6-NEXT:    s_mov_b32 s7, s10
3539; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3540; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3541; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3542; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3543; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3544; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3545; GFX6-NEXT:    s_endpgm
3546;
3547; GFX7-LABEL: global_system_release_monotonic_cmpxchg:
3548; GFX7:       ; %bb.0: ; %entry
3549; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3550; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3551; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3552; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3553; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3554; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3555; GFX7-NEXT:    s_mov_b32 s4, s8
3556; GFX7-NEXT:    s_mov_b32 s5, s9
3557; GFX7-NEXT:    s_mov_b32 s9, s10
3558; GFX7-NEXT:    s_mov_b32 s8, s11
3559; GFX7-NEXT:    s_add_u32 s4, s4, s9
3560; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3561; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3562; GFX7-NEXT:    s_mov_b32 s5, s8
3563; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3564; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3565; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3566; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3567; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3568; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3569; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3570; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3571; GFX7-NEXT:    s_endpgm
3572;
3573; GFX10-WGP-LABEL: global_system_release_monotonic_cmpxchg:
3574; GFX10-WGP:       ; %bb.0: ; %entry
3575; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3576; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3577; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3578; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3579; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3580; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3581; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3582; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3583; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3584; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3585; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3586; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3587; GFX10-WGP-NEXT:    s_endpgm
3588;
3589; GFX10-CU-LABEL: global_system_release_monotonic_cmpxchg:
3590; GFX10-CU:       ; %bb.0: ; %entry
3591; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3592; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3593; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3594; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3595; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3596; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3597; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3598; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3599; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3600; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3601; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3602; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3603; GFX10-CU-NEXT:    s_endpgm
3604;
3605; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg:
3606; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3607; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3608; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3609; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3610; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3611; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3612; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3613; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3614; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3615; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3616; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3617; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3618; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3619; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3620; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3621; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3622; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3623; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3624; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3625; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3626; SKIP-CACHE-INV-NEXT:    s_endpgm
3627;
3628; GFX90A-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg:
3629; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3630; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3631; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3632; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3633; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3634; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3635; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3636; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3637; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3638; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3639; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
3640; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3641; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3642; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3643;
3644; GFX90A-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg:
3645; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3646; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3647; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3648; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3649; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3650; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3651; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3652; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3653; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3654; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3655; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
3656; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3657; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3658; GFX90A-TGSPLIT-NEXT:    s_endpgm
3659;
3660; GFX940-NOTTGSPLIT-LABEL: global_system_release_monotonic_cmpxchg:
3661; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3662; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3663; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3664; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3665; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3666; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3667; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3668; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3669; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3670; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3671; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
3672; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3673; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3674; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3675;
3676; GFX940-TGSPLIT-LABEL: global_system_release_monotonic_cmpxchg:
3677; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3678; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3679; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3680; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3681; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3682; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3683; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3684; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3685; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3686; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3687; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
3688; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3689; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3690; GFX940-TGSPLIT-NEXT:    s_endpgm
3691;
3692; GFX11-WGP-LABEL: global_system_release_monotonic_cmpxchg:
3693; GFX11-WGP:       ; %bb.0: ; %entry
3694; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3695; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3696; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3697; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3698; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3699; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3700; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3701; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3702; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3703; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3704; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3705; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3706; GFX11-WGP-NEXT:    s_endpgm
3707;
3708; GFX11-CU-LABEL: global_system_release_monotonic_cmpxchg:
3709; GFX11-CU:       ; %bb.0: ; %entry
3710; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3711; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3712; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3713; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3714; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3715; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3716; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3717; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3718; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3719; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3720; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3721; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3722; GFX11-CU-NEXT:    s_endpgm
3723;
3724; GFX12-WGP-LABEL: global_system_release_monotonic_cmpxchg:
3725; GFX12-WGP:       ; %bb.0: ; %entry
3726; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3727; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3728; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3729; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3730; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
3731; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
3732; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
3733; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3734; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
3735; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
3736; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
3737; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
3738; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
3739; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
3740; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3741; GFX12-WGP-NEXT:    s_endpgm
3742;
3743; GFX12-CU-LABEL: global_system_release_monotonic_cmpxchg:
3744; GFX12-CU:       ; %bb.0: ; %entry
3745; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
3746; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3747; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3748; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3749; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
3750; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
3751; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
3752; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3753; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
3754; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
3755; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
3756; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
3757; GFX12-CU-NEXT:    s_wait_storecnt 0x0
3758; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
3759; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
3760; GFX12-CU-NEXT:    s_endpgm
3761    ptr addrspace(1) %out, i32 %in, i32 %old) {
3762entry:
3763  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
3764  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release monotonic
3765  ret void
3766}
3767
3768define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
3769; GFX6-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3770; GFX6:       ; %bb.0: ; %entry
3771; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
3772; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
3773; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
3774; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
3775; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
3776; GFX6-NEXT:    s_mov_b32 s12, s5
3777; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
3778; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
3779; GFX6-NEXT:    s_mov_b32 s11, -1
3780; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
3781; GFX6-NEXT:    s_mov_b32 s5, s12
3782; GFX6-NEXT:    s_mov_b32 s6, s11
3783; GFX6-NEXT:    s_mov_b32 s7, s10
3784; GFX6-NEXT:    v_mov_b32_e32 v0, s9
3785; GFX6-NEXT:    v_mov_b32_e32 v2, s8
3786; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3787; GFX6-NEXT:    v_mov_b32_e32 v1, v2
3788; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3789; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
3790; GFX6-NEXT:    s_waitcnt vmcnt(0)
3791; GFX6-NEXT:    buffer_wbinvl1
3792; GFX6-NEXT:    s_endpgm
3793;
3794; GFX7-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3795; GFX7:       ; %bb.0: ; %entry
3796; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
3797; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3798; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
3799; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
3800; GFX7-NEXT:    s_mov_b64 s[10:11], 16
3801; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
3802; GFX7-NEXT:    s_mov_b32 s4, s8
3803; GFX7-NEXT:    s_mov_b32 s5, s9
3804; GFX7-NEXT:    s_mov_b32 s9, s10
3805; GFX7-NEXT:    s_mov_b32 s8, s11
3806; GFX7-NEXT:    s_add_u32 s4, s4, s9
3807; GFX7-NEXT:    s_addc_u32 s8, s5, s8
3808; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
3809; GFX7-NEXT:    s_mov_b32 s5, s8
3810; GFX7-NEXT:    v_mov_b32_e32 v2, s7
3811; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3812; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3813; GFX7-NEXT:    v_mov_b32_e32 v3, v0
3814; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3815; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3816; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3817; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
3818; GFX7-NEXT:    s_waitcnt vmcnt(0)
3819; GFX7-NEXT:    buffer_wbinvl1_vol
3820; GFX7-NEXT:    s_endpgm
3821;
3822; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3823; GFX10-WGP:       ; %bb.0: ; %entry
3824; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
3825; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3826; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
3827; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
3828; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3829; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
3830; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
3831; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3832; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
3833; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3834; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3835; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3836; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3837; GFX10-WGP-NEXT:    buffer_gl1_inv
3838; GFX10-WGP-NEXT:    buffer_gl0_inv
3839; GFX10-WGP-NEXT:    s_endpgm
3840;
3841; GFX10-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3842; GFX10-CU:       ; %bb.0: ; %entry
3843; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
3844; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3845; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
3846; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
3847; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
3848; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
3849; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
3850; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3851; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
3852; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3853; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3854; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
3855; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3856; GFX10-CU-NEXT:    buffer_gl1_inv
3857; GFX10-CU-NEXT:    buffer_gl0_inv
3858; GFX10-CU-NEXT:    s_endpgm
3859;
3860; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3861; SKIP-CACHE-INV:       ; %bb.0: ; %entry
3862; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
3863; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
3864; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
3865; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
3866; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
3867; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
3868; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
3869; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
3870; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
3871; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
3872; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
3873; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
3874; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
3875; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
3876; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
3877; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
3878; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
3879; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3880; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
3881; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
3882; SKIP-CACHE-INV-NEXT:    s_endpgm
3883;
3884; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3885; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
3886; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3887; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3888; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3889; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3890; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3891; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3892; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3893; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3894; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3895; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
3896; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3897; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3898; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3899; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
3900; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
3901; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
3902;
3903; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3904; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
3905; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3906; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
3907; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
3908; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
3909; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3910; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
3911; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
3912; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3913; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3914; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
3915; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3916; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
3917; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3918; GFX90A-TGSPLIT-NEXT:    buffer_invl2
3919; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
3920; GFX90A-TGSPLIT-NEXT:    s_endpgm
3921;
3922; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3923; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
3924; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3925; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3926; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3927; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3928; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3929; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3930; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3931; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3932; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3933; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
3934; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3935; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3936; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3937; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
3938; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
3939;
3940; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3941; GFX940-TGSPLIT:       ; %bb.0: ; %entry
3942; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
3943; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
3944; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
3945; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
3946; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
3947; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
3948; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
3949; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
3950; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
3951; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
3952; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3953; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
3954; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
3955; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
3956; GFX940-TGSPLIT-NEXT:    s_endpgm
3957;
3958; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3959; GFX11-WGP:       ; %bb.0: ; %entry
3960; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
3961; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3962; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
3963; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
3964; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
3965; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
3966; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
3967; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3968; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
3969; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3970; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3971; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3972; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
3973; GFX11-WGP-NEXT:    buffer_gl1_inv
3974; GFX11-WGP-NEXT:    buffer_gl0_inv
3975; GFX11-WGP-NEXT:    s_endpgm
3976;
3977; GFX11-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3978; GFX11-CU:       ; %bb.0: ; %entry
3979; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
3980; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
3981; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
3982; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
3983; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
3984; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
3985; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
3986; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
3987; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
3988; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3989; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3990; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
3991; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
3992; GFX11-CU-NEXT:    buffer_gl1_inv
3993; GFX11-CU-NEXT:    buffer_gl0_inv
3994; GFX11-CU-NEXT:    s_endpgm
3995;
3996; GFX12-WGP-LABEL: global_system_acq_rel_monotonic_cmpxchg:
3997; GFX12-WGP:       ; %bb.0: ; %entry
3998; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
3999; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4000; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4001; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4002; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4003; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4004; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4005; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4006; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4007; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
4008; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
4009; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
4010; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4011; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
4012; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4013; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4014; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
4015; GFX12-WGP-NEXT:    s_endpgm
4016;
4017; GFX12-CU-LABEL: global_system_acq_rel_monotonic_cmpxchg:
4018; GFX12-CU:       ; %bb.0: ; %entry
4019; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4020; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4021; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4022; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4023; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4024; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4025; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4026; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4027; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4028; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
4029; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
4030; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
4031; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4032; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
4033; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4034; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4035; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
4036; GFX12-CU-NEXT:    s_endpgm
4037    ptr addrspace(1) %out, i32 %in, i32 %old) {
4038entry:
4039  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4040  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic
4041  ret void
4042}
4043
4044define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
4045; GFX6-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4046; GFX6:       ; %bb.0: ; %entry
4047; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4048; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4049; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4050; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4051; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4052; GFX6-NEXT:    s_mov_b32 s12, s5
4053; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4054; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4055; GFX6-NEXT:    s_mov_b32 s11, -1
4056; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4057; GFX6-NEXT:    s_mov_b32 s5, s12
4058; GFX6-NEXT:    s_mov_b32 s6, s11
4059; GFX6-NEXT:    s_mov_b32 s7, s10
4060; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4061; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4062; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4063; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4064; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4065; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4066; GFX6-NEXT:    s_waitcnt vmcnt(0)
4067; GFX6-NEXT:    buffer_wbinvl1
4068; GFX6-NEXT:    s_endpgm
4069;
4070; GFX7-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4071; GFX7:       ; %bb.0: ; %entry
4072; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4073; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4074; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4075; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4076; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4077; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4078; GFX7-NEXT:    s_mov_b32 s4, s8
4079; GFX7-NEXT:    s_mov_b32 s5, s9
4080; GFX7-NEXT:    s_mov_b32 s9, s10
4081; GFX7-NEXT:    s_mov_b32 s8, s11
4082; GFX7-NEXT:    s_add_u32 s4, s4, s9
4083; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4084; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4085; GFX7-NEXT:    s_mov_b32 s5, s8
4086; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4087; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4088; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4089; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4090; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4091; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4092; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4093; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4094; GFX7-NEXT:    s_waitcnt vmcnt(0)
4095; GFX7-NEXT:    buffer_wbinvl1_vol
4096; GFX7-NEXT:    s_endpgm
4097;
4098; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4099; GFX10-WGP:       ; %bb.0: ; %entry
4100; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4101; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4102; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4103; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4104; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4105; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4106; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4107; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4108; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4109; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4110; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4111; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4112; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4113; GFX10-WGP-NEXT:    buffer_gl1_inv
4114; GFX10-WGP-NEXT:    buffer_gl0_inv
4115; GFX10-WGP-NEXT:    s_endpgm
4116;
4117; GFX10-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4118; GFX10-CU:       ; %bb.0: ; %entry
4119; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4120; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4121; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4122; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4123; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4124; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4125; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4126; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4127; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4128; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4129; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4130; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4131; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4132; GFX10-CU-NEXT:    buffer_gl1_inv
4133; GFX10-CU-NEXT:    buffer_gl0_inv
4134; GFX10-CU-NEXT:    s_endpgm
4135;
4136; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4137; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4138; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4139; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4140; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4141; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4142; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4143; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4144; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4145; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4146; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4147; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4148; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4149; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4150; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4151; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4152; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4153; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4154; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4155; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4156; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4157; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4158; SKIP-CACHE-INV-NEXT:    s_endpgm
4159;
4160; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4161; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4162; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4163; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4164; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4165; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4166; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4167; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4168; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4169; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4170; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4171; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
4172; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4173; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4174; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4175; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
4176; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4177; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4178;
4179; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4180; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4181; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4182; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4183; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4184; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4185; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4186; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4187; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4188; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4189; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4190; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
4191; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4192; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4193; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4194; GFX90A-TGSPLIT-NEXT:    buffer_invl2
4195; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4196; GFX90A-TGSPLIT-NEXT:    s_endpgm
4197;
4198; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4199; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4200; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4201; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4202; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4203; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4204; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4205; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4206; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4207; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4208; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4209; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
4210; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4211; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4212; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4213; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
4214; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4215;
4216; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4217; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4218; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4219; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4220; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4221; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4222; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4223; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4224; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4225; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4226; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4227; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
4228; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4229; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4230; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4231; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
4232; GFX940-TGSPLIT-NEXT:    s_endpgm
4233;
4234; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4235; GFX11-WGP:       ; %bb.0: ; %entry
4236; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4237; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4238; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4239; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4240; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4241; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4242; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4243; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4244; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4245; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4246; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4247; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4248; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4249; GFX11-WGP-NEXT:    buffer_gl1_inv
4250; GFX11-WGP-NEXT:    buffer_gl0_inv
4251; GFX11-WGP-NEXT:    s_endpgm
4252;
4253; GFX11-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4254; GFX11-CU:       ; %bb.0: ; %entry
4255; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4256; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4257; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4258; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4259; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4260; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4261; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4262; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4263; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4264; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4265; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4266; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4267; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4268; GFX11-CU-NEXT:    buffer_gl1_inv
4269; GFX11-CU-NEXT:    buffer_gl0_inv
4270; GFX11-CU-NEXT:    s_endpgm
4271;
4272; GFX12-WGP-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4273; GFX12-WGP:       ; %bb.0: ; %entry
4274; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4275; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4276; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4277; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4278; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4279; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4280; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4281; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4282; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4283; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
4284; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
4285; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
4286; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4287; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
4288; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4289; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4290; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
4291; GFX12-WGP-NEXT:    s_endpgm
4292;
4293; GFX12-CU-LABEL: global_system_seq_cst_monotonic_cmpxchg:
4294; GFX12-CU:       ; %bb.0: ; %entry
4295; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4296; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4297; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4298; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4299; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4300; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4301; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4302; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4303; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4304; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
4305; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
4306; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
4307; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4308; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
4309; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4310; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4311; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
4312; GFX12-CU-NEXT:    s_endpgm
4313    ptr addrspace(1) %out, i32 %in, i32 %old) {
4314entry:
4315  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4316  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic
4317  ret void
4318}
4319
4320define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
4321; GFX6-LABEL: global_system_monotonic_acquire_cmpxchg:
4322; GFX6:       ; %bb.0: ; %entry
4323; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4324; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4325; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4326; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4327; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4328; GFX6-NEXT:    s_mov_b32 s12, s5
4329; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4330; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4331; GFX6-NEXT:    s_mov_b32 s11, -1
4332; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4333; GFX6-NEXT:    s_mov_b32 s5, s12
4334; GFX6-NEXT:    s_mov_b32 s6, s11
4335; GFX6-NEXT:    s_mov_b32 s7, s10
4336; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4337; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4338; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4339; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4340; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4341; GFX6-NEXT:    s_waitcnt vmcnt(0)
4342; GFX6-NEXT:    buffer_wbinvl1
4343; GFX6-NEXT:    s_endpgm
4344;
4345; GFX7-LABEL: global_system_monotonic_acquire_cmpxchg:
4346; GFX7:       ; %bb.0: ; %entry
4347; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4348; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4349; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4350; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4351; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4352; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4353; GFX7-NEXT:    s_mov_b32 s4, s8
4354; GFX7-NEXT:    s_mov_b32 s5, s9
4355; GFX7-NEXT:    s_mov_b32 s9, s10
4356; GFX7-NEXT:    s_mov_b32 s8, s11
4357; GFX7-NEXT:    s_add_u32 s4, s4, s9
4358; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4359; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4360; GFX7-NEXT:    s_mov_b32 s5, s8
4361; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4362; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4363; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4364; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4365; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4366; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4367; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4368; GFX7-NEXT:    s_waitcnt vmcnt(0)
4369; GFX7-NEXT:    buffer_wbinvl1_vol
4370; GFX7-NEXT:    s_endpgm
4371;
4372; GFX10-WGP-LABEL: global_system_monotonic_acquire_cmpxchg:
4373; GFX10-WGP:       ; %bb.0: ; %entry
4374; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4375; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4376; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4377; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4378; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4379; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4380; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4381; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4382; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4383; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4384; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4385; GFX10-WGP-NEXT:    buffer_gl1_inv
4386; GFX10-WGP-NEXT:    buffer_gl0_inv
4387; GFX10-WGP-NEXT:    s_endpgm
4388;
4389; GFX10-CU-LABEL: global_system_monotonic_acquire_cmpxchg:
4390; GFX10-CU:       ; %bb.0: ; %entry
4391; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4392; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4393; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4394; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4395; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4396; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4397; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4398; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4399; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4400; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4401; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4402; GFX10-CU-NEXT:    buffer_gl1_inv
4403; GFX10-CU-NEXT:    buffer_gl0_inv
4404; GFX10-CU-NEXT:    s_endpgm
4405;
4406; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg:
4407; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4408; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4409; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4410; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4411; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4412; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4413; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4414; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4415; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4416; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4417; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4418; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4419; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4420; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4421; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4422; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4423; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4424; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4425; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4426; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4427; SKIP-CACHE-INV-NEXT:    s_endpgm
4428;
4429; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg:
4430; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4431; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4432; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4433; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4434; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4435; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4436; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4437; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4438; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4439; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4440; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4441; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4442; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
4443; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4444; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4445;
4446; GFX90A-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg:
4447; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4448; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4449; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4450; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4451; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4452; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4453; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4454; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4455; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4456; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4457; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4458; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4459; GFX90A-TGSPLIT-NEXT:    buffer_invl2
4460; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4461; GFX90A-TGSPLIT-NEXT:    s_endpgm
4462;
4463; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg:
4464; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4465; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4466; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4467; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4468; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4469; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4470; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4471; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4472; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4473; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4474; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4475; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4476; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
4477; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4478;
4479; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_cmpxchg:
4480; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4481; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4482; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4483; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4484; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4485; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4486; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4487; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4488; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4489; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4490; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4491; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4492; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
4493; GFX940-TGSPLIT-NEXT:    s_endpgm
4494;
4495; GFX11-WGP-LABEL: global_system_monotonic_acquire_cmpxchg:
4496; GFX11-WGP:       ; %bb.0: ; %entry
4497; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4498; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4499; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4500; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4501; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4502; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4503; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4504; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4505; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4506; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4507; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4508; GFX11-WGP-NEXT:    buffer_gl1_inv
4509; GFX11-WGP-NEXT:    buffer_gl0_inv
4510; GFX11-WGP-NEXT:    s_endpgm
4511;
4512; GFX11-CU-LABEL: global_system_monotonic_acquire_cmpxchg:
4513; GFX11-CU:       ; %bb.0: ; %entry
4514; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4515; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4516; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4517; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4518; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4519; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4520; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4521; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4522; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4523; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4524; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4525; GFX11-CU-NEXT:    buffer_gl1_inv
4526; GFX11-CU-NEXT:    buffer_gl0_inv
4527; GFX11-CU-NEXT:    s_endpgm
4528;
4529; GFX12-WGP-LABEL: global_system_monotonic_acquire_cmpxchg:
4530; GFX12-WGP:       ; %bb.0: ; %entry
4531; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4532; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4533; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4534; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4535; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4536; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4537; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4538; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4539; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4540; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4541; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4542; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
4543; GFX12-WGP-NEXT:    s_endpgm
4544;
4545; GFX12-CU-LABEL: global_system_monotonic_acquire_cmpxchg:
4546; GFX12-CU:       ; %bb.0: ; %entry
4547; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4548; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4549; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4550; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4551; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4552; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4553; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4554; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4555; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4556; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4557; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4558; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
4559; GFX12-CU-NEXT:    s_endpgm
4560    ptr addrspace(1) %out, i32 %in, i32 %old) {
4561entry:
4562  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4563  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire
4564  ret void
4565}
4566
4567define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
4568; GFX6-LABEL: global_system_acquire_acquire_cmpxchg:
4569; GFX6:       ; %bb.0: ; %entry
4570; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4571; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4572; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4573; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4574; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4575; GFX6-NEXT:    s_mov_b32 s12, s5
4576; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4577; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4578; GFX6-NEXT:    s_mov_b32 s11, -1
4579; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4580; GFX6-NEXT:    s_mov_b32 s5, s12
4581; GFX6-NEXT:    s_mov_b32 s6, s11
4582; GFX6-NEXT:    s_mov_b32 s7, s10
4583; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4584; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4585; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4586; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4587; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4588; GFX6-NEXT:    s_waitcnt vmcnt(0)
4589; GFX6-NEXT:    buffer_wbinvl1
4590; GFX6-NEXT:    s_endpgm
4591;
4592; GFX7-LABEL: global_system_acquire_acquire_cmpxchg:
4593; GFX7:       ; %bb.0: ; %entry
4594; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4595; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4596; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4597; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4598; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4599; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4600; GFX7-NEXT:    s_mov_b32 s4, s8
4601; GFX7-NEXT:    s_mov_b32 s5, s9
4602; GFX7-NEXT:    s_mov_b32 s9, s10
4603; GFX7-NEXT:    s_mov_b32 s8, s11
4604; GFX7-NEXT:    s_add_u32 s4, s4, s9
4605; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4606; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4607; GFX7-NEXT:    s_mov_b32 s5, s8
4608; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4609; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4610; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4611; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4612; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4613; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4614; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4615; GFX7-NEXT:    s_waitcnt vmcnt(0)
4616; GFX7-NEXT:    buffer_wbinvl1_vol
4617; GFX7-NEXT:    s_endpgm
4618;
4619; GFX10-WGP-LABEL: global_system_acquire_acquire_cmpxchg:
4620; GFX10-WGP:       ; %bb.0: ; %entry
4621; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4622; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4623; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4624; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4625; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4626; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4627; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4628; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4629; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4630; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4631; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4632; GFX10-WGP-NEXT:    buffer_gl1_inv
4633; GFX10-WGP-NEXT:    buffer_gl0_inv
4634; GFX10-WGP-NEXT:    s_endpgm
4635;
4636; GFX10-CU-LABEL: global_system_acquire_acquire_cmpxchg:
4637; GFX10-CU:       ; %bb.0: ; %entry
4638; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4639; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4640; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4641; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4642; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4643; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4644; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4645; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4646; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4647; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4648; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4649; GFX10-CU-NEXT:    buffer_gl1_inv
4650; GFX10-CU-NEXT:    buffer_gl0_inv
4651; GFX10-CU-NEXT:    s_endpgm
4652;
4653; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg:
4654; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4655; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4656; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4657; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4658; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4659; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4660; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4661; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4662; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4663; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4664; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4665; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4666; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4667; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4668; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4669; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4670; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4671; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4672; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4673; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4674; SKIP-CACHE-INV-NEXT:    s_endpgm
4675;
4676; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg:
4677; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4678; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4679; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4680; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4681; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4682; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4683; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4684; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4685; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4686; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4687; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4688; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4689; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
4690; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4691; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4692;
4693; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg:
4694; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4695; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4696; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4697; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4698; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4699; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4700; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4701; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4702; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4703; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4704; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4705; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4706; GFX90A-TGSPLIT-NEXT:    buffer_invl2
4707; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4708; GFX90A-TGSPLIT-NEXT:    s_endpgm
4709;
4710; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg:
4711; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4712; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4713; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4714; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4715; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4716; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4717; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4718; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4719; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4720; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4721; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4722; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4723; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
4724; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4725;
4726; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_cmpxchg:
4727; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4728; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4729; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4730; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4731; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4732; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4733; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4734; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4735; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4736; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4737; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4738; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4739; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
4740; GFX940-TGSPLIT-NEXT:    s_endpgm
4741;
4742; GFX11-WGP-LABEL: global_system_acquire_acquire_cmpxchg:
4743; GFX11-WGP:       ; %bb.0: ; %entry
4744; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
4745; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4746; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4747; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4748; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4749; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
4750; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
4751; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4752; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
4753; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4754; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4755; GFX11-WGP-NEXT:    buffer_gl1_inv
4756; GFX11-WGP-NEXT:    buffer_gl0_inv
4757; GFX11-WGP-NEXT:    s_endpgm
4758;
4759; GFX11-CU-LABEL: global_system_acquire_acquire_cmpxchg:
4760; GFX11-CU:       ; %bb.0: ; %entry
4761; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
4762; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4763; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4764; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4765; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
4766; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
4767; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
4768; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4769; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
4770; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
4771; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4772; GFX11-CU-NEXT:    buffer_gl1_inv
4773; GFX11-CU-NEXT:    buffer_gl0_inv
4774; GFX11-CU-NEXT:    s_endpgm
4775;
4776; GFX12-WGP-LABEL: global_system_acquire_acquire_cmpxchg:
4777; GFX12-WGP:       ; %bb.0: ; %entry
4778; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
4779; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4780; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
4781; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
4782; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
4783; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
4784; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
4785; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4786; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
4787; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4788; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
4789; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
4790; GFX12-WGP-NEXT:    s_endpgm
4791;
4792; GFX12-CU-LABEL: global_system_acquire_acquire_cmpxchg:
4793; GFX12-CU:       ; %bb.0: ; %entry
4794; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
4795; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
4796; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
4797; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
4798; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
4799; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
4800; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
4801; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4802; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
4803; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
4804; GFX12-CU-NEXT:    s_wait_storecnt 0x0
4805; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
4806; GFX12-CU-NEXT:    s_endpgm
4807    ptr addrspace(1) %out, i32 %in, i32 %old) {
4808entry:
4809  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
4810  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire
4811  ret void
4812}
4813
4814define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
4815; GFX6-LABEL: global_system_release_acquire_cmpxchg:
4816; GFX6:       ; %bb.0: ; %entry
4817; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
4818; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
4819; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
4820; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
4821; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
4822; GFX6-NEXT:    s_mov_b32 s12, s5
4823; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
4824; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
4825; GFX6-NEXT:    s_mov_b32 s11, -1
4826; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
4827; GFX6-NEXT:    s_mov_b32 s5, s12
4828; GFX6-NEXT:    s_mov_b32 s6, s11
4829; GFX6-NEXT:    s_mov_b32 s7, s10
4830; GFX6-NEXT:    v_mov_b32_e32 v0, s9
4831; GFX6-NEXT:    v_mov_b32_e32 v2, s8
4832; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4833; GFX6-NEXT:    v_mov_b32_e32 v1, v2
4834; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4835; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
4836; GFX6-NEXT:    s_waitcnt vmcnt(0)
4837; GFX6-NEXT:    buffer_wbinvl1
4838; GFX6-NEXT:    s_endpgm
4839;
4840; GFX7-LABEL: global_system_release_acquire_cmpxchg:
4841; GFX7:       ; %bb.0: ; %entry
4842; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
4843; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4844; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
4845; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
4846; GFX7-NEXT:    s_mov_b64 s[10:11], 16
4847; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4848; GFX7-NEXT:    s_mov_b32 s4, s8
4849; GFX7-NEXT:    s_mov_b32 s5, s9
4850; GFX7-NEXT:    s_mov_b32 s9, s10
4851; GFX7-NEXT:    s_mov_b32 s8, s11
4852; GFX7-NEXT:    s_add_u32 s4, s4, s9
4853; GFX7-NEXT:    s_addc_u32 s8, s5, s8
4854; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
4855; GFX7-NEXT:    s_mov_b32 s5, s8
4856; GFX7-NEXT:    v_mov_b32_e32 v2, s7
4857; GFX7-NEXT:    v_mov_b32_e32 v0, s6
4858; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4859; GFX7-NEXT:    v_mov_b32_e32 v3, v0
4860; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4861; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4862; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4863; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
4864; GFX7-NEXT:    s_waitcnt vmcnt(0)
4865; GFX7-NEXT:    buffer_wbinvl1_vol
4866; GFX7-NEXT:    s_endpgm
4867;
4868; GFX10-WGP-LABEL: global_system_release_acquire_cmpxchg:
4869; GFX10-WGP:       ; %bb.0: ; %entry
4870; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
4871; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4872; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
4873; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
4874; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
4875; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
4876; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
4877; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4878; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
4879; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4880; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4881; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4882; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
4883; GFX10-WGP-NEXT:    buffer_gl1_inv
4884; GFX10-WGP-NEXT:    buffer_gl0_inv
4885; GFX10-WGP-NEXT:    s_endpgm
4886;
4887; GFX10-CU-LABEL: global_system_release_acquire_cmpxchg:
4888; GFX10-CU:       ; %bb.0: ; %entry
4889; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
4890; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4891; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
4892; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
4893; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
4894; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
4895; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
4896; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
4897; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
4898; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4899; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4900; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
4901; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
4902; GFX10-CU-NEXT:    buffer_gl1_inv
4903; GFX10-CU-NEXT:    buffer_gl0_inv
4904; GFX10-CU-NEXT:    s_endpgm
4905;
4906; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg:
4907; SKIP-CACHE-INV:       ; %bb.0: ; %entry
4908; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
4909; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
4910; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
4911; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
4912; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
4913; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
4914; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
4915; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
4916; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
4917; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
4918; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
4919; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
4920; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
4921; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
4922; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
4923; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
4924; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
4925; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4926; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
4927; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
4928; SKIP-CACHE-INV-NEXT:    s_endpgm
4929;
4930; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg:
4931; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
4932; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4933; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4934; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4935; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4936; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4937; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4938; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4939; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4940; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4941; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
4942; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4943; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4944; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4945; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
4946; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
4947; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
4948;
4949; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg:
4950; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
4951; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4952; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
4953; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
4954; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
4955; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4956; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
4957; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
4958; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4959; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4960; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
4961; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4962; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
4963; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4964; GFX90A-TGSPLIT-NEXT:    buffer_invl2
4965; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
4966; GFX90A-TGSPLIT-NEXT:    s_endpgm
4967;
4968; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_cmpxchg:
4969; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
4970; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4971; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4972; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4973; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4974; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4975; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4976; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4977; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4978; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4979; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
4980; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4981; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
4982; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
4983; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
4984; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
4985;
4986; GFX940-TGSPLIT-LABEL: global_system_release_acquire_cmpxchg:
4987; GFX940-TGSPLIT:       ; %bb.0: ; %entry
4988; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
4989; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
4990; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
4991; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
4992; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
4993; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
4994; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
4995; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
4996; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
4997; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
4998; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4999; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5000; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5001; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
5002; GFX940-TGSPLIT-NEXT:    s_endpgm
5003;
5004; GFX11-WGP-LABEL: global_system_release_acquire_cmpxchg:
5005; GFX11-WGP:       ; %bb.0: ; %entry
5006; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5007; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5008; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5009; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5010; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5011; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5012; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5013; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5014; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5015; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5016; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5017; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5018; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5019; GFX11-WGP-NEXT:    buffer_gl1_inv
5020; GFX11-WGP-NEXT:    buffer_gl0_inv
5021; GFX11-WGP-NEXT:    s_endpgm
5022;
5023; GFX11-CU-LABEL: global_system_release_acquire_cmpxchg:
5024; GFX11-CU:       ; %bb.0: ; %entry
5025; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5026; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5027; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5028; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5029; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5030; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5031; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5032; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5033; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5034; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5035; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5036; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5037; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5038; GFX11-CU-NEXT:    buffer_gl1_inv
5039; GFX11-CU-NEXT:    buffer_gl0_inv
5040; GFX11-CU-NEXT:    s_endpgm
5041;
5042; GFX12-WGP-LABEL: global_system_release_acquire_cmpxchg:
5043; GFX12-WGP:       ; %bb.0: ; %entry
5044; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5045; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5046; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5047; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5048; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5049; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5050; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5051; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5052; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5053; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
5054; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5055; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5056; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5057; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5058; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5059; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5060; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
5061; GFX12-WGP-NEXT:    s_endpgm
5062;
5063; GFX12-CU-LABEL: global_system_release_acquire_cmpxchg:
5064; GFX12-CU:       ; %bb.0: ; %entry
5065; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5066; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5067; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5068; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5069; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5070; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5071; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5072; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5073; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5074; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
5075; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5076; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5077; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5078; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5079; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5080; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5081; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
5082; GFX12-CU-NEXT:    s_endpgm
5083    ptr addrspace(1) %out, i32 %in, i32 %old) {
5084entry:
5085  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5086  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire
5087  ret void
5088}
5089
5090define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
5091; GFX6-LABEL: global_system_acq_rel_acquire_cmpxchg:
5092; GFX6:       ; %bb.0: ; %entry
5093; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5094; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5095; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5096; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5097; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5098; GFX6-NEXT:    s_mov_b32 s12, s5
5099; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5100; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5101; GFX6-NEXT:    s_mov_b32 s11, -1
5102; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5103; GFX6-NEXT:    s_mov_b32 s5, s12
5104; GFX6-NEXT:    s_mov_b32 s6, s11
5105; GFX6-NEXT:    s_mov_b32 s7, s10
5106; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5107; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5108; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5109; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5110; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5111; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5112; GFX6-NEXT:    s_waitcnt vmcnt(0)
5113; GFX6-NEXT:    buffer_wbinvl1
5114; GFX6-NEXT:    s_endpgm
5115;
5116; GFX7-LABEL: global_system_acq_rel_acquire_cmpxchg:
5117; GFX7:       ; %bb.0: ; %entry
5118; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5119; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5120; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5121; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5122; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5123; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5124; GFX7-NEXT:    s_mov_b32 s4, s8
5125; GFX7-NEXT:    s_mov_b32 s5, s9
5126; GFX7-NEXT:    s_mov_b32 s9, s10
5127; GFX7-NEXT:    s_mov_b32 s8, s11
5128; GFX7-NEXT:    s_add_u32 s4, s4, s9
5129; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5130; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5131; GFX7-NEXT:    s_mov_b32 s5, s8
5132; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5133; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5134; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5135; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5136; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5137; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5138; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5139; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5140; GFX7-NEXT:    s_waitcnt vmcnt(0)
5141; GFX7-NEXT:    buffer_wbinvl1_vol
5142; GFX7-NEXT:    s_endpgm
5143;
5144; GFX10-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg:
5145; GFX10-WGP:       ; %bb.0: ; %entry
5146; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5147; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5148; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5149; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5150; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5151; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5152; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5153; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5154; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5155; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5156; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5157; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5158; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5159; GFX10-WGP-NEXT:    buffer_gl1_inv
5160; GFX10-WGP-NEXT:    buffer_gl0_inv
5161; GFX10-WGP-NEXT:    s_endpgm
5162;
5163; GFX10-CU-LABEL: global_system_acq_rel_acquire_cmpxchg:
5164; GFX10-CU:       ; %bb.0: ; %entry
5165; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5166; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5167; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5168; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5169; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5170; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5171; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5172; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5173; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5174; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5175; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5176; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5177; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5178; GFX10-CU-NEXT:    buffer_gl1_inv
5179; GFX10-CU-NEXT:    buffer_gl0_inv
5180; GFX10-CU-NEXT:    s_endpgm
5181;
5182; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg:
5183; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5184; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5185; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5186; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5187; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5188; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5189; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5190; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5191; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5192; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5193; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5194; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5195; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5196; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5197; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5198; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5199; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5200; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5201; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5202; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5203; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5204; SKIP-CACHE-INV-NEXT:    s_endpgm
5205;
5206; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg:
5207; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5208; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5209; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5210; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5211; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5212; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5213; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5214; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5215; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5216; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5217; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
5218; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5219; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5220; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5221; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
5222; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5223; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5224;
5225; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg:
5226; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5227; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5228; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5229; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5230; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5231; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5232; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5233; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5234; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5235; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5236; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
5237; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5238; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5239; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5240; GFX90A-TGSPLIT-NEXT:    buffer_invl2
5241; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5242; GFX90A-TGSPLIT-NEXT:    s_endpgm
5243;
5244; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg:
5245; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5246; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5247; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5248; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5249; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5250; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5251; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5252; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5253; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5254; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5255; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5256; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5257; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5258; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5259; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
5260; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5261;
5262; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_cmpxchg:
5263; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5264; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5265; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5266; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5267; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5268; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5269; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5270; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5271; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5272; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5273; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5274; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5275; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5276; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5277; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
5278; GFX940-TGSPLIT-NEXT:    s_endpgm
5279;
5280; GFX11-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg:
5281; GFX11-WGP:       ; %bb.0: ; %entry
5282; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5283; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5284; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5285; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5286; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5287; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5288; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5289; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5290; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5291; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5292; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5293; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5294; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5295; GFX11-WGP-NEXT:    buffer_gl1_inv
5296; GFX11-WGP-NEXT:    buffer_gl0_inv
5297; GFX11-WGP-NEXT:    s_endpgm
5298;
5299; GFX11-CU-LABEL: global_system_acq_rel_acquire_cmpxchg:
5300; GFX11-CU:       ; %bb.0: ; %entry
5301; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5302; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5303; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5304; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5305; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5306; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5307; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5308; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5309; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5310; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5311; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5312; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5313; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5314; GFX11-CU-NEXT:    buffer_gl1_inv
5315; GFX11-CU-NEXT:    buffer_gl0_inv
5316; GFX11-CU-NEXT:    s_endpgm
5317;
5318; GFX12-WGP-LABEL: global_system_acq_rel_acquire_cmpxchg:
5319; GFX12-WGP:       ; %bb.0: ; %entry
5320; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5321; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5322; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5323; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5324; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5325; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5326; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5327; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5328; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5329; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
5330; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5331; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5332; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5333; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5334; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5335; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5336; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
5337; GFX12-WGP-NEXT:    s_endpgm
5338;
5339; GFX12-CU-LABEL: global_system_acq_rel_acquire_cmpxchg:
5340; GFX12-CU:       ; %bb.0: ; %entry
5341; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5342; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5343; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5344; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5345; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5346; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5347; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5348; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5349; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5350; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
5351; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5352; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5353; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5354; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5355; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5356; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5357; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
5358; GFX12-CU-NEXT:    s_endpgm
5359    ptr addrspace(1) %out, i32 %in, i32 %old) {
5360entry:
5361  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5362  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire
5363  ret void
5364}
5365
5366define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
5367; GFX6-LABEL: global_system_seq_cst_acquire_cmpxchg:
5368; GFX6:       ; %bb.0: ; %entry
5369; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5370; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5371; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5372; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5373; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5374; GFX6-NEXT:    s_mov_b32 s12, s5
5375; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5376; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5377; GFX6-NEXT:    s_mov_b32 s11, -1
5378; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5379; GFX6-NEXT:    s_mov_b32 s5, s12
5380; GFX6-NEXT:    s_mov_b32 s6, s11
5381; GFX6-NEXT:    s_mov_b32 s7, s10
5382; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5383; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5384; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5385; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5386; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5387; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5388; GFX6-NEXT:    s_waitcnt vmcnt(0)
5389; GFX6-NEXT:    buffer_wbinvl1
5390; GFX6-NEXT:    s_endpgm
5391;
5392; GFX7-LABEL: global_system_seq_cst_acquire_cmpxchg:
5393; GFX7:       ; %bb.0: ; %entry
5394; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5395; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5396; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5397; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5398; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5399; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5400; GFX7-NEXT:    s_mov_b32 s4, s8
5401; GFX7-NEXT:    s_mov_b32 s5, s9
5402; GFX7-NEXT:    s_mov_b32 s9, s10
5403; GFX7-NEXT:    s_mov_b32 s8, s11
5404; GFX7-NEXT:    s_add_u32 s4, s4, s9
5405; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5406; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5407; GFX7-NEXT:    s_mov_b32 s5, s8
5408; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5409; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5410; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5411; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5412; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5413; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5414; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5415; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5416; GFX7-NEXT:    s_waitcnt vmcnt(0)
5417; GFX7-NEXT:    buffer_wbinvl1_vol
5418; GFX7-NEXT:    s_endpgm
5419;
5420; GFX10-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg:
5421; GFX10-WGP:       ; %bb.0: ; %entry
5422; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5423; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5424; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5425; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5426; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5427; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5428; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5429; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5430; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5431; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5432; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5433; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5434; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5435; GFX10-WGP-NEXT:    buffer_gl1_inv
5436; GFX10-WGP-NEXT:    buffer_gl0_inv
5437; GFX10-WGP-NEXT:    s_endpgm
5438;
5439; GFX10-CU-LABEL: global_system_seq_cst_acquire_cmpxchg:
5440; GFX10-CU:       ; %bb.0: ; %entry
5441; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5442; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5443; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5444; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5445; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5446; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5447; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5448; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5449; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5450; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5451; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5452; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5453; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5454; GFX10-CU-NEXT:    buffer_gl1_inv
5455; GFX10-CU-NEXT:    buffer_gl0_inv
5456; GFX10-CU-NEXT:    s_endpgm
5457;
5458; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg:
5459; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5460; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5461; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5462; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5463; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5464; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5465; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5466; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5467; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5468; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5469; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5470; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5471; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5472; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5473; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5474; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5475; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5476; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5477; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5478; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5479; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5480; SKIP-CACHE-INV-NEXT:    s_endpgm
5481;
5482; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg:
5483; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5484; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5485; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5486; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5487; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5488; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5489; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5490; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5491; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5492; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5493; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
5494; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5495; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5496; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5497; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
5498; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5499; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5500;
5501; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg:
5502; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5503; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5504; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5505; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5506; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5507; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5508; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5509; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5510; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5511; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5512; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
5513; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5514; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5515; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5516; GFX90A-TGSPLIT-NEXT:    buffer_invl2
5517; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5518; GFX90A-TGSPLIT-NEXT:    s_endpgm
5519;
5520; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg:
5521; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5522; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5523; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5524; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5525; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5526; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5527; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5528; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5529; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5530; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5531; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5532; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5533; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5534; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5535; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
5536; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5537;
5538; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_cmpxchg:
5539; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5540; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5541; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5542; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5543; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5544; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5545; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5546; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5547; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5548; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5549; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5550; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5551; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5552; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5553; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
5554; GFX940-TGSPLIT-NEXT:    s_endpgm
5555;
5556; GFX11-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg:
5557; GFX11-WGP:       ; %bb.0: ; %entry
5558; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5559; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5560; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5561; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5562; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5563; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5564; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5565; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5566; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5567; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5568; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5569; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5570; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5571; GFX11-WGP-NEXT:    buffer_gl1_inv
5572; GFX11-WGP-NEXT:    buffer_gl0_inv
5573; GFX11-WGP-NEXT:    s_endpgm
5574;
5575; GFX11-CU-LABEL: global_system_seq_cst_acquire_cmpxchg:
5576; GFX11-CU:       ; %bb.0: ; %entry
5577; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5578; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5579; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5580; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5581; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5582; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5583; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5584; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5585; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5586; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5587; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5588; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5589; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5590; GFX11-CU-NEXT:    buffer_gl1_inv
5591; GFX11-CU-NEXT:    buffer_gl0_inv
5592; GFX11-CU-NEXT:    s_endpgm
5593;
5594; GFX12-WGP-LABEL: global_system_seq_cst_acquire_cmpxchg:
5595; GFX12-WGP:       ; %bb.0: ; %entry
5596; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5597; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5598; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5599; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5600; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5601; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5602; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5603; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5604; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5605; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
5606; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5607; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5608; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5609; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5610; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5611; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5612; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
5613; GFX12-WGP-NEXT:    s_endpgm
5614;
5615; GFX12-CU-LABEL: global_system_seq_cst_acquire_cmpxchg:
5616; GFX12-CU:       ; %bb.0: ; %entry
5617; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5618; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5619; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5620; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5621; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5622; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5623; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5624; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5625; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5626; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
5627; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5628; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5629; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5630; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5631; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5632; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5633; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
5634; GFX12-CU-NEXT:    s_endpgm
5635    ptr addrspace(1) %out, i32 %in, i32 %old) {
5636entry:
5637  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5638  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire
5639  ret void
5640}
5641
5642define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
5643; GFX6-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5644; GFX6:       ; %bb.0: ; %entry
5645; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5646; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5647; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5648; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5649; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5650; GFX6-NEXT:    s_mov_b32 s12, s5
5651; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5652; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5653; GFX6-NEXT:    s_mov_b32 s11, -1
5654; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5655; GFX6-NEXT:    s_mov_b32 s5, s12
5656; GFX6-NEXT:    s_mov_b32 s6, s11
5657; GFX6-NEXT:    s_mov_b32 s7, s10
5658; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5659; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5660; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5661; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5662; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5663; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
5664; GFX6-NEXT:    s_waitcnt vmcnt(0)
5665; GFX6-NEXT:    buffer_wbinvl1
5666; GFX6-NEXT:    s_endpgm
5667;
5668; GFX7-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5669; GFX7:       ; %bb.0: ; %entry
5670; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
5671; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5672; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
5673; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
5674; GFX7-NEXT:    s_mov_b64 s[10:11], 16
5675; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5676; GFX7-NEXT:    s_mov_b32 s4, s8
5677; GFX7-NEXT:    s_mov_b32 s5, s9
5678; GFX7-NEXT:    s_mov_b32 s9, s10
5679; GFX7-NEXT:    s_mov_b32 s8, s11
5680; GFX7-NEXT:    s_add_u32 s4, s4, s9
5681; GFX7-NEXT:    s_addc_u32 s8, s5, s8
5682; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
5683; GFX7-NEXT:    s_mov_b32 s5, s8
5684; GFX7-NEXT:    v_mov_b32_e32 v2, s7
5685; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5686; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5687; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5688; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5689; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5690; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5691; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
5692; GFX7-NEXT:    s_waitcnt vmcnt(0)
5693; GFX7-NEXT:    buffer_wbinvl1_vol
5694; GFX7-NEXT:    s_endpgm
5695;
5696; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5697; GFX10-WGP:       ; %bb.0: ; %entry
5698; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5699; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5700; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5701; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5702; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5703; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5704; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5705; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5706; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5707; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5708; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5709; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5710; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5711; GFX10-WGP-NEXT:    buffer_gl1_inv
5712; GFX10-WGP-NEXT:    buffer_gl0_inv
5713; GFX10-WGP-NEXT:    s_endpgm
5714;
5715; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5716; GFX10-CU:       ; %bb.0: ; %entry
5717; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5718; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5719; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5720; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5721; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5722; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5723; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5724; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5725; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
5726; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5727; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5728; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
5729; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5730; GFX10-CU-NEXT:    buffer_gl1_inv
5731; GFX10-CU-NEXT:    buffer_gl0_inv
5732; GFX10-CU-NEXT:    s_endpgm
5733;
5734; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5735; SKIP-CACHE-INV:       ; %bb.0: ; %entry
5736; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
5737; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
5738; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
5739; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
5740; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
5741; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
5742; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
5743; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
5744; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
5745; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
5746; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
5747; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
5748; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
5749; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
5750; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
5751; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5752; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
5753; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5754; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
5755; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
5756; SKIP-CACHE-INV-NEXT:    s_endpgm
5757;
5758; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5759; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
5760; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5761; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5762; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5763; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5764; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5765; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5766; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5767; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5768; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5769; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
5770; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5771; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5772; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5773; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
5774; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
5775; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
5776;
5777; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5778; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
5779; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5780; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5781; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
5782; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
5783; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5784; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
5785; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
5786; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5787; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5788; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
5789; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5790; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
5791; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5792; GFX90A-TGSPLIT-NEXT:    buffer_invl2
5793; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
5794; GFX90A-TGSPLIT-NEXT:    s_endpgm
5795;
5796; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5797; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
5798; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5799; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5800; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5801; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5802; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5803; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5804; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5805; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5806; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5807; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5808; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5809; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5810; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5811; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
5812; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
5813;
5814; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5815; GFX940-TGSPLIT:       ; %bb.0: ; %entry
5816; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
5817; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
5818; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
5819; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
5820; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
5821; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
5822; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
5823; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5824; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
5825; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
5826; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5827; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
5828; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
5829; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
5830; GFX940-TGSPLIT-NEXT:    s_endpgm
5831;
5832; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5833; GFX11-WGP:       ; %bb.0: ; %entry
5834; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
5835; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5836; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5837; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5838; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5839; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
5840; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
5841; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5842; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
5843; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5844; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5845; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5846; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
5847; GFX11-WGP-NEXT:    buffer_gl1_inv
5848; GFX11-WGP-NEXT:    buffer_gl0_inv
5849; GFX11-WGP-NEXT:    s_endpgm
5850;
5851; GFX11-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5852; GFX11-CU:       ; %bb.0: ; %entry
5853; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
5854; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5855; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5856; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5857; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
5858; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
5859; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
5860; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5861; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
5862; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5863; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5864; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
5865; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
5866; GFX11-CU-NEXT:    buffer_gl1_inv
5867; GFX11-CU-NEXT:    buffer_gl0_inv
5868; GFX11-CU-NEXT:    s_endpgm
5869;
5870; GFX12-WGP-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5871; GFX12-WGP:       ; %bb.0: ; %entry
5872; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
5873; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5874; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
5875; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
5876; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
5877; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
5878; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
5879; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5880; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
5881; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
5882; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
5883; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
5884; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5885; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
5886; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5887; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
5888; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
5889; GFX12-WGP-NEXT:    s_endpgm
5890;
5891; GFX12-CU-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
5892; GFX12-CU:       ; %bb.0: ; %entry
5893; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
5894; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
5895; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
5896; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
5897; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
5898; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
5899; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
5900; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5901; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
5902; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
5903; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
5904; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
5905; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5906; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
5907; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
5908; GFX12-CU-NEXT:    s_wait_storecnt 0x0
5909; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
5910; GFX12-CU-NEXT:    s_endpgm
5911    ptr addrspace(1) %out, i32 %in, i32 %old) {
5912entry:
5913  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
5914  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
5915  ret void
5916}
5917
5918define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
5919; GFX6-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
5920; GFX6:       ; %bb.0: ; %entry
5921; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
5922; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5923; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
5924; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
5925; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
5926; GFX6-NEXT:    s_mov_b32 s12, s5
5927; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
5928; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
5929; GFX6-NEXT:    s_mov_b32 s11, -1
5930; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
5931; GFX6-NEXT:    s_mov_b32 s5, s12
5932; GFX6-NEXT:    s_mov_b32 s6, s11
5933; GFX6-NEXT:    s_mov_b32 s7, s10
5934; GFX6-NEXT:    v_mov_b32_e32 v0, s9
5935; GFX6-NEXT:    v_mov_b32_e32 v2, s8
5936; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
5937; GFX6-NEXT:    v_mov_b32_e32 v1, v2
5938; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
5939; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
5940; GFX6-NEXT:    s_waitcnt vmcnt(0)
5941; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
5942; GFX6-NEXT:    s_endpgm
5943;
5944; GFX7-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
5945; GFX7:       ; %bb.0: ; %entry
5946; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
5947; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
5948; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
5949; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
5950; GFX7-NEXT:    s_mov_b64 s[12:13], 16
5951; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5952; GFX7-NEXT:    s_mov_b32 s6, s4
5953; GFX7-NEXT:    s_mov_b32 s7, s5
5954; GFX7-NEXT:    s_mov_b32 s11, s12
5955; GFX7-NEXT:    s_mov_b32 s10, s13
5956; GFX7-NEXT:    s_add_u32 s6, s6, s11
5957; GFX7-NEXT:    s_addc_u32 s10, s7, s10
5958; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
5959; GFX7-NEXT:    s_mov_b32 s7, s10
5960; GFX7-NEXT:    v_mov_b32_e32 v2, s9
5961; GFX7-NEXT:    v_mov_b32_e32 v0, s8
5962; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
5963; GFX7-NEXT:    v_mov_b32_e32 v3, v0
5964; GFX7-NEXT:    v_mov_b32_e32 v0, s6
5965; GFX7-NEXT:    v_mov_b32_e32 v1, s7
5966; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
5967; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5968; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5969; GFX7-NEXT:    s_waitcnt vmcnt(0)
5970; GFX7-NEXT:    flat_store_dword v[0:1], v2
5971; GFX7-NEXT:    s_endpgm
5972;
5973; GFX10-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
5974; GFX10-WGP:       ; %bb.0: ; %entry
5975; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
5976; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5977; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
5978; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
5979; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
5980; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
5981; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
5982; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5983; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
5984; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
5985; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
5986; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
5987; GFX10-WGP-NEXT:    s_endpgm
5988;
5989; GFX10-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
5990; GFX10-CU:       ; %bb.0: ; %entry
5991; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
5992; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
5993; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
5994; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
5995; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
5996; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
5997; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
5998; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
5999; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6000; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6001; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
6002; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
6003; GFX10-CU-NEXT:    s_endpgm
6004;
6005; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6006; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6007; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6008; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6009; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6010; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6011; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6012; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6013; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6014; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6015; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6016; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6017; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6018; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6019; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6020; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6021; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6022; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6023; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6024; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6025; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6026; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6027; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
6028; SKIP-CACHE-INV-NEXT:    s_endpgm
6029;
6030; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6031; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6032; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6033; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6034; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6035; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6036; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6037; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6038; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6039; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6040; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6041; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6042; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6043; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6044; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6045;
6046; GFX90A-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6047; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6048; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6049; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6050; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6051; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6052; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6053; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6054; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6055; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6056; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6057; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6058; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6059; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6060; GFX90A-TGSPLIT-NEXT:    s_endpgm
6061;
6062; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6063; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6064; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6065; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6066; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6067; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6068; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6069; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6070; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6071; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6072; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6073; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6074; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6075; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6076; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6077;
6078; GFX940-TGSPLIT-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6079; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6080; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6081; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6082; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6083; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6084; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6085; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6086; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6087; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6088; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6089; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6090; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6091; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6092; GFX940-TGSPLIT-NEXT:    s_endpgm
6093;
6094; GFX11-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6095; GFX11-WGP:       ; %bb.0: ; %entry
6096; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6097; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6098; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6099; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6100; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6101; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6102; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6103; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6104; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6105; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6106; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
6107; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6108; GFX11-WGP-NEXT:    s_endpgm
6109;
6110; GFX11-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6111; GFX11-CU:       ; %bb.0: ; %entry
6112; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6113; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6114; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6115; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6116; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6117; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6118; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6119; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6120; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6121; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6122; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
6123; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6124; GFX11-CU-NEXT:    s_endpgm
6125;
6126; GFX12-WGP-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6127; GFX12-WGP:       ; %bb.0: ; %entry
6128; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6129; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6130; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6131; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6132; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6133; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6134; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6135; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6136; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6137; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6138; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
6139; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6140; GFX12-WGP-NEXT:    s_endpgm
6141;
6142; GFX12-CU-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
6143; GFX12-CU:       ; %bb.0: ; %entry
6144; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6145; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6146; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6147; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6148; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6149; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6150; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6151; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6152; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6153; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6154; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
6155; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6156; GFX12-CU-NEXT:    s_endpgm
6157    ptr addrspace(1) %out, i32 %in, i32 %old) {
6158entry:
6159  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6160  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic monotonic
6161  %val0 = extractvalue { i32, i1 } %val, 0
6162  store i32 %val0, ptr addrspace(1) %out, align 4
6163  ret void
6164}
6165
6166define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
6167; GFX6-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6168; GFX6:       ; %bb.0: ; %entry
6169; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6170; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6171; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6172; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6173; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6174; GFX6-NEXT:    s_mov_b32 s12, s5
6175; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6176; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6177; GFX6-NEXT:    s_mov_b32 s11, -1
6178; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6179; GFX6-NEXT:    s_mov_b32 s5, s12
6180; GFX6-NEXT:    s_mov_b32 s6, s11
6181; GFX6-NEXT:    s_mov_b32 s7, s10
6182; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6183; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6184; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6185; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6186; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6187; GFX6-NEXT:    s_waitcnt vmcnt(0)
6188; GFX6-NEXT:    buffer_wbinvl1
6189; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6190; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
6191; GFX6-NEXT:    s_endpgm
6192;
6193; GFX7-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6194; GFX7:       ; %bb.0: ; %entry
6195; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
6196; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6197; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
6198; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
6199; GFX7-NEXT:    s_mov_b64 s[12:13], 16
6200; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6201; GFX7-NEXT:    s_mov_b32 s6, s4
6202; GFX7-NEXT:    s_mov_b32 s7, s5
6203; GFX7-NEXT:    s_mov_b32 s11, s12
6204; GFX7-NEXT:    s_mov_b32 s10, s13
6205; GFX7-NEXT:    s_add_u32 s6, s6, s11
6206; GFX7-NEXT:    s_addc_u32 s10, s7, s10
6207; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
6208; GFX7-NEXT:    s_mov_b32 s7, s10
6209; GFX7-NEXT:    v_mov_b32_e32 v2, s9
6210; GFX7-NEXT:    v_mov_b32_e32 v0, s8
6211; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6212; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6213; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6214; GFX7-NEXT:    v_mov_b32_e32 v1, s7
6215; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6216; GFX7-NEXT:    s_waitcnt vmcnt(0)
6217; GFX7-NEXT:    buffer_wbinvl1_vol
6218; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6219; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6220; GFX7-NEXT:    flat_store_dword v[0:1], v2
6221; GFX7-NEXT:    s_endpgm
6222;
6223; GFX10-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6224; GFX10-WGP:       ; %bb.0: ; %entry
6225; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6226; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6227; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6228; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6229; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6230; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6231; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6232; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6233; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6234; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6235; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
6236; GFX10-WGP-NEXT:    buffer_gl1_inv
6237; GFX10-WGP-NEXT:    buffer_gl0_inv
6238; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
6239; GFX10-WGP-NEXT:    s_endpgm
6240;
6241; GFX10-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6242; GFX10-CU:       ; %bb.0: ; %entry
6243; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6244; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6245; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6246; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6247; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6248; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6249; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6250; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6251; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6252; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6253; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
6254; GFX10-CU-NEXT:    buffer_gl1_inv
6255; GFX10-CU-NEXT:    buffer_gl0_inv
6256; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
6257; GFX10-CU-NEXT:    s_endpgm
6258;
6259; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6260; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6261; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6262; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6263; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6264; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6265; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6266; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6267; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6268; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6269; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6270; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6271; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6272; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6273; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6274; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6275; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6276; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6277; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6278; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6279; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6280; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6281; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6282; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
6283; SKIP-CACHE-INV-NEXT:    s_endpgm
6284;
6285; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6286; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6287; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6288; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6289; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6290; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6291; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6292; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6293; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6294; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6295; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6296; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6297; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6298; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
6299; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6300; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6301; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6302;
6303; GFX90A-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6304; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6305; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6306; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6307; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6308; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6309; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6310; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6311; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6312; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6313; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6314; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6315; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6316; GFX90A-TGSPLIT-NEXT:    buffer_invl2
6317; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6318; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6319; GFX90A-TGSPLIT-NEXT:    s_endpgm
6320;
6321; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6322; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6323; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6324; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6325; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6326; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6327; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6328; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6329; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6330; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6331; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6332; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6333; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6334; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
6335; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6336; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6337;
6338; GFX940-TGSPLIT-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6339; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6340; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6341; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6342; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6343; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6344; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6345; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6346; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6347; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6348; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6349; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6350; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6351; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
6352; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6353; GFX940-TGSPLIT-NEXT:    s_endpgm
6354;
6355; GFX11-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6356; GFX11-WGP:       ; %bb.0: ; %entry
6357; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6358; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6359; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6360; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6361; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6362; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6363; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6364; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6365; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6366; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6367; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
6368; GFX11-WGP-NEXT:    buffer_gl1_inv
6369; GFX11-WGP-NEXT:    buffer_gl0_inv
6370; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6371; GFX11-WGP-NEXT:    s_endpgm
6372;
6373; GFX11-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6374; GFX11-CU:       ; %bb.0: ; %entry
6375; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6376; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6377; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6378; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6379; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6380; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6381; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6382; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6383; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6384; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6385; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
6386; GFX11-CU-NEXT:    buffer_gl1_inv
6387; GFX11-CU-NEXT:    buffer_gl0_inv
6388; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6389; GFX11-CU-NEXT:    s_endpgm
6390;
6391; GFX12-WGP-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6392; GFX12-WGP:       ; %bb.0: ; %entry
6393; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6394; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6395; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6396; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6397; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6398; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6399; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6400; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6401; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6402; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6403; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
6404; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
6405; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6406; GFX12-WGP-NEXT:    s_endpgm
6407;
6408; GFX12-CU-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
6409; GFX12-CU:       ; %bb.0: ; %entry
6410; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6411; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6412; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6413; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6414; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6415; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6416; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6417; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6418; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6419; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6420; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
6421; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
6422; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6423; GFX12-CU-NEXT:    s_endpgm
6424    ptr addrspace(1) %out, i32 %in, i32 %old) {
6425entry:
6426  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6427  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire monotonic
6428  %val0 = extractvalue { i32, i1 } %val, 0
6429  store i32 %val0, ptr addrspace(1) %out, align 4
6430  ret void
6431}
6432
6433define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
6434; GFX6-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6435; GFX6:       ; %bb.0: ; %entry
6436; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6437; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6438; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6439; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6440; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6441; GFX6-NEXT:    s_mov_b32 s12, s5
6442; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6443; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6444; GFX6-NEXT:    s_mov_b32 s11, -1
6445; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6446; GFX6-NEXT:    s_mov_b32 s5, s12
6447; GFX6-NEXT:    s_mov_b32 s6, s11
6448; GFX6-NEXT:    s_mov_b32 s7, s10
6449; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6450; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6451; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6452; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6453; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6454; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6455; GFX6-NEXT:    s_waitcnt vmcnt(0)
6456; GFX6-NEXT:    buffer_wbinvl1
6457; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6458; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
6459; GFX6-NEXT:    s_endpgm
6460;
6461; GFX7-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6462; GFX7:       ; %bb.0: ; %entry
6463; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
6464; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6465; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
6466; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
6467; GFX7-NEXT:    s_mov_b64 s[12:13], 16
6468; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6469; GFX7-NEXT:    s_mov_b32 s6, s4
6470; GFX7-NEXT:    s_mov_b32 s7, s5
6471; GFX7-NEXT:    s_mov_b32 s11, s12
6472; GFX7-NEXT:    s_mov_b32 s10, s13
6473; GFX7-NEXT:    s_add_u32 s6, s6, s11
6474; GFX7-NEXT:    s_addc_u32 s10, s7, s10
6475; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
6476; GFX7-NEXT:    s_mov_b32 s7, s10
6477; GFX7-NEXT:    v_mov_b32_e32 v2, s9
6478; GFX7-NEXT:    v_mov_b32_e32 v0, s8
6479; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6480; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6481; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6482; GFX7-NEXT:    v_mov_b32_e32 v1, s7
6483; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6484; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6485; GFX7-NEXT:    s_waitcnt vmcnt(0)
6486; GFX7-NEXT:    buffer_wbinvl1_vol
6487; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6488; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6489; GFX7-NEXT:    flat_store_dword v[0:1], v2
6490; GFX7-NEXT:    s_endpgm
6491;
6492; GFX10-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6493; GFX10-WGP:       ; %bb.0: ; %entry
6494; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6495; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6496; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6497; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6498; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6499; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6500; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6501; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6502; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6503; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6504; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6505; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6506; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
6507; GFX10-WGP-NEXT:    buffer_gl1_inv
6508; GFX10-WGP-NEXT:    buffer_gl0_inv
6509; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
6510; GFX10-WGP-NEXT:    s_endpgm
6511;
6512; GFX10-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6513; GFX10-CU:       ; %bb.0: ; %entry
6514; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6515; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6516; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6517; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6518; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6519; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6520; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6521; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6522; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6523; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6524; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6525; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6526; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
6527; GFX10-CU-NEXT:    buffer_gl1_inv
6528; GFX10-CU-NEXT:    buffer_gl0_inv
6529; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
6530; GFX10-CU-NEXT:    s_endpgm
6531;
6532; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6533; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6534; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6535; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6536; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6537; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6538; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6539; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6540; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6541; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6542; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6543; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6544; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6545; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6546; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6547; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6548; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6549; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6550; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6551; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6552; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6553; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6554; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6555; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6556; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
6557; SKIP-CACHE-INV-NEXT:    s_endpgm
6558;
6559; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6560; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6561; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6562; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6563; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6564; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6565; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6566; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6567; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6568; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6569; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6570; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
6571; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6572; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6573; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6574; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
6575; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6576; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6577; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6578;
6579; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6580; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6581; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6582; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6583; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6584; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6585; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6586; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6587; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6588; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6589; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6590; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
6591; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6592; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6593; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6594; GFX90A-TGSPLIT-NEXT:    buffer_invl2
6595; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6596; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6597; GFX90A-TGSPLIT-NEXT:    s_endpgm
6598;
6599; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6600; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6601; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6602; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6603; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6604; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6605; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6606; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6607; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6608; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6609; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6610; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
6611; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6612; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6613; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6614; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
6615; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6616; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6617;
6618; GFX940-TGSPLIT-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6619; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6620; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6621; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6622; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6623; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6624; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6625; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6626; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6627; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6628; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6629; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
6630; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6631; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6632; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6633; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
6634; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6635; GFX940-TGSPLIT-NEXT:    s_endpgm
6636;
6637; GFX11-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6638; GFX11-WGP:       ; %bb.0: ; %entry
6639; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6640; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6641; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6642; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6643; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6644; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6645; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6646; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6647; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6648; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6649; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6650; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6651; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
6652; GFX11-WGP-NEXT:    buffer_gl1_inv
6653; GFX11-WGP-NEXT:    buffer_gl0_inv
6654; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6655; GFX11-WGP-NEXT:    s_endpgm
6656;
6657; GFX11-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6658; GFX11-CU:       ; %bb.0: ; %entry
6659; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6660; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6661; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6662; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6663; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6664; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6665; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6666; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6667; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6668; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6669; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6670; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6671; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
6672; GFX11-CU-NEXT:    buffer_gl1_inv
6673; GFX11-CU-NEXT:    buffer_gl0_inv
6674; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6675; GFX11-CU-NEXT:    s_endpgm
6676;
6677; GFX12-WGP-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6678; GFX12-WGP:       ; %bb.0: ; %entry
6679; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6680; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6681; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6682; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6683; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6684; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6685; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6686; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6687; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6688; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
6689; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6690; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6691; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6692; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6693; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6694; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6695; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6696; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
6697; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
6698; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6699; GFX12-WGP-NEXT:    s_endpgm
6700;
6701; GFX12-CU-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
6702; GFX12-CU:       ; %bb.0: ; %entry
6703; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
6704; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6705; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6706; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6707; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
6708; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
6709; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
6710; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6711; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
6712; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
6713; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6714; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6715; GFX12-CU-NEXT:    s_wait_storecnt 0x0
6716; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
6717; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6718; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
6719; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
6720; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
6721; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
6722; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6723; GFX12-CU-NEXT:    s_endpgm
6724    ptr addrspace(1) %out, i32 %in, i32 %old) {
6725entry:
6726  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
6727  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel monotonic
6728  %val0 = extractvalue { i32, i1 } %val, 0
6729  store i32 %val0, ptr addrspace(1) %out, align 4
6730  ret void
6731}
6732
6733define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
6734; GFX6-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6735; GFX6:       ; %bb.0: ; %entry
6736; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
6737; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6738; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
6739; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
6740; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
6741; GFX6-NEXT:    s_mov_b32 s12, s5
6742; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
6743; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
6744; GFX6-NEXT:    s_mov_b32 s11, -1
6745; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
6746; GFX6-NEXT:    s_mov_b32 s5, s12
6747; GFX6-NEXT:    s_mov_b32 s6, s11
6748; GFX6-NEXT:    s_mov_b32 s7, s10
6749; GFX6-NEXT:    v_mov_b32_e32 v0, s9
6750; GFX6-NEXT:    v_mov_b32_e32 v2, s8
6751; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6752; GFX6-NEXT:    v_mov_b32_e32 v1, v2
6753; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6754; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
6755; GFX6-NEXT:    s_waitcnt vmcnt(0)
6756; GFX6-NEXT:    buffer_wbinvl1
6757; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6758; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
6759; GFX6-NEXT:    s_endpgm
6760;
6761; GFX7-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6762; GFX7:       ; %bb.0: ; %entry
6763; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
6764; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
6765; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
6766; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
6767; GFX7-NEXT:    s_mov_b64 s[12:13], 16
6768; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
6769; GFX7-NEXT:    s_mov_b32 s6, s4
6770; GFX7-NEXT:    s_mov_b32 s7, s5
6771; GFX7-NEXT:    s_mov_b32 s11, s12
6772; GFX7-NEXT:    s_mov_b32 s10, s13
6773; GFX7-NEXT:    s_add_u32 s6, s6, s11
6774; GFX7-NEXT:    s_addc_u32 s10, s7, s10
6775; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
6776; GFX7-NEXT:    s_mov_b32 s7, s10
6777; GFX7-NEXT:    v_mov_b32_e32 v2, s9
6778; GFX7-NEXT:    v_mov_b32_e32 v0, s8
6779; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6780; GFX7-NEXT:    v_mov_b32_e32 v3, v0
6781; GFX7-NEXT:    v_mov_b32_e32 v0, s6
6782; GFX7-NEXT:    v_mov_b32_e32 v1, s7
6783; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6784; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
6785; GFX7-NEXT:    s_waitcnt vmcnt(0)
6786; GFX7-NEXT:    buffer_wbinvl1_vol
6787; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6788; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6789; GFX7-NEXT:    flat_store_dword v[0:1], v2
6790; GFX7-NEXT:    s_endpgm
6791;
6792; GFX10-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6793; GFX10-WGP:       ; %bb.0: ; %entry
6794; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
6795; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6796; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
6797; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
6798; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6799; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
6800; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
6801; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6802; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
6803; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6804; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6805; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6806; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
6807; GFX10-WGP-NEXT:    buffer_gl1_inv
6808; GFX10-WGP-NEXT:    buffer_gl0_inv
6809; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
6810; GFX10-WGP-NEXT:    s_endpgm
6811;
6812; GFX10-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6813; GFX10-CU:       ; %bb.0: ; %entry
6814; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
6815; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6816; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
6817; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
6818; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
6819; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
6820; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
6821; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6822; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
6823; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6824; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6825; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
6826; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
6827; GFX10-CU-NEXT:    buffer_gl1_inv
6828; GFX10-CU-NEXT:    buffer_gl0_inv
6829; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
6830; GFX10-CU-NEXT:    s_endpgm
6831;
6832; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6833; SKIP-CACHE-INV:       ; %bb.0: ; %entry
6834; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
6835; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
6836; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
6837; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
6838; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
6839; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
6840; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
6841; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
6842; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
6843; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
6844; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
6845; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
6846; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
6847; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
6848; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
6849; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
6850; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
6851; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6852; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
6853; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6854; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
6855; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
6856; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
6857; SKIP-CACHE-INV-NEXT:    s_endpgm
6858;
6859; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6860; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
6861; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6862; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6863; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6864; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6865; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6866; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6867; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6868; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6869; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6870; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
6871; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6872; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6873; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6874; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
6875; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
6876; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6877; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
6878;
6879; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6880; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
6881; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6882; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
6883; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
6884; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
6885; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6886; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
6887; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
6888; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6889; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6890; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
6891; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6892; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
6893; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6894; GFX90A-TGSPLIT-NEXT:    buffer_invl2
6895; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
6896; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
6897; GFX90A-TGSPLIT-NEXT:    s_endpgm
6898;
6899; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6900; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
6901; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6902; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6903; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6904; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6905; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6906; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6907; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6908; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6909; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6910; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
6911; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6912; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6913; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6914; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
6915; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6916; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
6917;
6918; GFX940-TGSPLIT-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6919; GFX940-TGSPLIT:       ; %bb.0: ; %entry
6920; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
6921; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
6922; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
6923; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
6924; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
6925; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
6926; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
6927; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
6928; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
6929; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
6930; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6931; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
6932; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
6933; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
6934; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
6935; GFX940-TGSPLIT-NEXT:    s_endpgm
6936;
6937; GFX11-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6938; GFX11-WGP:       ; %bb.0: ; %entry
6939; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
6940; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6941; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6942; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6943; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
6944; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
6945; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
6946; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6947; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
6948; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6949; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
6950; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6951; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
6952; GFX11-WGP-NEXT:    buffer_gl1_inv
6953; GFX11-WGP-NEXT:    buffer_gl0_inv
6954; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6955; GFX11-WGP-NEXT:    s_endpgm
6956;
6957; GFX11-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6958; GFX11-CU:       ; %bb.0: ; %entry
6959; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
6960; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6961; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
6962; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
6963; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
6964; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
6965; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
6966; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6967; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
6968; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6969; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
6970; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
6971; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
6972; GFX11-CU-NEXT:    buffer_gl1_inv
6973; GFX11-CU-NEXT:    buffer_gl0_inv
6974; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
6975; GFX11-CU-NEXT:    s_endpgm
6976;
6977; GFX12-WGP-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
6978; GFX12-WGP:       ; %bb.0: ; %entry
6979; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
6980; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
6981; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
6982; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
6983; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
6984; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
6985; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
6986; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
6987; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
6988; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
6989; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6990; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6991; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
6992; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
6993; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
6994; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
6995; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
6996; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
6997; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
6998; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
6999; GFX12-WGP-NEXT:    s_endpgm
7000;
7001; GFX12-CU-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
7002; GFX12-CU:       ; %bb.0: ; %entry
7003; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7004; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7005; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7006; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7007; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7008; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7009; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7010; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7011; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7012; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
7013; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7014; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7015; GFX12-CU-NEXT:    s_wait_storecnt 0x0
7016; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
7017; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7018; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7019; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7020; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7021; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
7022; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7023; GFX12-CU-NEXT:    s_endpgm
7024    ptr addrspace(1) %out, i32 %in, i32 %old) {
7025entry:
7026  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7027  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst monotonic
7028  %val0 = extractvalue { i32, i1 } %val, 0
7029  store i32 %val0, ptr addrspace(1) %out, align 4
7030  ret void
7031}
7032
7033define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
7034; GFX6-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7035; GFX6:       ; %bb.0: ; %entry
7036; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7037; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7038; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7039; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7040; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7041; GFX6-NEXT:    s_mov_b32 s12, s5
7042; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7043; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7044; GFX6-NEXT:    s_mov_b32 s11, -1
7045; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7046; GFX6-NEXT:    s_mov_b32 s5, s12
7047; GFX6-NEXT:    s_mov_b32 s6, s11
7048; GFX6-NEXT:    s_mov_b32 s7, s10
7049; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7050; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7051; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7052; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7053; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7054; GFX6-NEXT:    s_waitcnt vmcnt(0)
7055; GFX6-NEXT:    buffer_wbinvl1
7056; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7057; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7058; GFX6-NEXT:    s_endpgm
7059;
7060; GFX7-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7061; GFX7:       ; %bb.0: ; %entry
7062; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7063; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7064; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7065; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7066; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7067; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7068; GFX7-NEXT:    s_mov_b32 s6, s4
7069; GFX7-NEXT:    s_mov_b32 s7, s5
7070; GFX7-NEXT:    s_mov_b32 s11, s12
7071; GFX7-NEXT:    s_mov_b32 s10, s13
7072; GFX7-NEXT:    s_add_u32 s6, s6, s11
7073; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7074; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7075; GFX7-NEXT:    s_mov_b32 s7, s10
7076; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7077; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7078; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7079; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7080; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7081; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7082; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7083; GFX7-NEXT:    s_waitcnt vmcnt(0)
7084; GFX7-NEXT:    buffer_wbinvl1_vol
7085; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7086; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7087; GFX7-NEXT:    flat_store_dword v[0:1], v2
7088; GFX7-NEXT:    s_endpgm
7089;
7090; GFX10-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7091; GFX10-WGP:       ; %bb.0: ; %entry
7092; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7093; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7094; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7095; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7096; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7097; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7098; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7099; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7100; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7101; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7102; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7103; GFX10-WGP-NEXT:    buffer_gl1_inv
7104; GFX10-WGP-NEXT:    buffer_gl0_inv
7105; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7106; GFX10-WGP-NEXT:    s_endpgm
7107;
7108; GFX10-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7109; GFX10-CU:       ; %bb.0: ; %entry
7110; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7111; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7112; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7113; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7114; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7115; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7116; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7117; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7118; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7119; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7120; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7121; GFX10-CU-NEXT:    buffer_gl1_inv
7122; GFX10-CU-NEXT:    buffer_gl0_inv
7123; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7124; GFX10-CU-NEXT:    s_endpgm
7125;
7126; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7127; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7128; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7129; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7130; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7131; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7132; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7133; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7134; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7135; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7136; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7137; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7138; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7139; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7140; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7141; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7142; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7143; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7144; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7145; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7146; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7147; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7148; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7149; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7150; SKIP-CACHE-INV-NEXT:    s_endpgm
7151;
7152; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7153; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7154; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7155; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7156; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7157; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7158; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7159; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7160; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7161; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7162; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7163; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7164; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7165; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
7166; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
7167; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7168; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7169;
7170; GFX90A-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7171; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7172; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7173; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7174; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7175; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7176; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7177; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7178; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7179; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7180; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7181; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7182; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7183; GFX90A-TGSPLIT-NEXT:    buffer_invl2
7184; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
7185; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7186; GFX90A-TGSPLIT-NEXT:    s_endpgm
7187;
7188; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7189; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7190; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7191; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7192; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7193; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7194; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7195; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7196; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7197; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7198; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7199; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7200; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7201; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
7202; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7203; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7204;
7205; GFX940-TGSPLIT-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7206; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7207; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7208; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7209; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7210; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7211; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7212; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7213; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7214; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7215; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7216; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7217; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7218; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
7219; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7220; GFX940-TGSPLIT-NEXT:    s_endpgm
7221;
7222; GFX11-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7223; GFX11-WGP:       ; %bb.0: ; %entry
7224; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7225; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7226; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7227; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7228; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7229; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7230; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7231; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7232; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7233; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7234; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7235; GFX11-WGP-NEXT:    buffer_gl1_inv
7236; GFX11-WGP-NEXT:    buffer_gl0_inv
7237; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7238; GFX11-WGP-NEXT:    s_endpgm
7239;
7240; GFX11-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7241; GFX11-CU:       ; %bb.0: ; %entry
7242; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7243; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7244; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7245; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7246; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7247; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7248; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7249; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7250; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7251; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7252; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7253; GFX11-CU-NEXT:    buffer_gl1_inv
7254; GFX11-CU-NEXT:    buffer_gl0_inv
7255; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7256; GFX11-CU-NEXT:    s_endpgm
7257;
7258; GFX12-WGP-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7259; GFX12-WGP:       ; %bb.0: ; %entry
7260; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7261; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7262; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7263; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7264; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7265; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7266; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7267; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7268; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7269; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7270; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7271; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7272; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7273; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
7274; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7275; GFX12-WGP-NEXT:    s_endpgm
7276;
7277; GFX12-CU-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
7278; GFX12-CU:       ; %bb.0: ; %entry
7279; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7280; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7281; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7282; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7283; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7284; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7285; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7286; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7287; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7288; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7289; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7290; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7291; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7292; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
7293; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7294; GFX12-CU-NEXT:    s_endpgm
7295    ptr addrspace(1) %out, i32 %in, i32 %old) {
7296entry:
7297  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7298  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic acquire
7299  %val0 = extractvalue { i32, i1 } %val, 0
7300  store i32 %val0, ptr addrspace(1) %out, align 4
7301  ret void
7302}
7303
7304define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
7305; GFX6-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7306; GFX6:       ; %bb.0: ; %entry
7307; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7308; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7309; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7310; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7311; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7312; GFX6-NEXT:    s_mov_b32 s12, s5
7313; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7314; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7315; GFX6-NEXT:    s_mov_b32 s11, -1
7316; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7317; GFX6-NEXT:    s_mov_b32 s5, s12
7318; GFX6-NEXT:    s_mov_b32 s6, s11
7319; GFX6-NEXT:    s_mov_b32 s7, s10
7320; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7321; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7322; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7323; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7324; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7325; GFX6-NEXT:    s_waitcnt vmcnt(0)
7326; GFX6-NEXT:    buffer_wbinvl1
7327; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7328; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7329; GFX6-NEXT:    s_endpgm
7330;
7331; GFX7-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7332; GFX7:       ; %bb.0: ; %entry
7333; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7334; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7335; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7336; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7337; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7338; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7339; GFX7-NEXT:    s_mov_b32 s6, s4
7340; GFX7-NEXT:    s_mov_b32 s7, s5
7341; GFX7-NEXT:    s_mov_b32 s11, s12
7342; GFX7-NEXT:    s_mov_b32 s10, s13
7343; GFX7-NEXT:    s_add_u32 s6, s6, s11
7344; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7345; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7346; GFX7-NEXT:    s_mov_b32 s7, s10
7347; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7348; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7349; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7350; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7351; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7352; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7353; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7354; GFX7-NEXT:    s_waitcnt vmcnt(0)
7355; GFX7-NEXT:    buffer_wbinvl1_vol
7356; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7357; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7358; GFX7-NEXT:    flat_store_dword v[0:1], v2
7359; GFX7-NEXT:    s_endpgm
7360;
7361; GFX10-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7362; GFX10-WGP:       ; %bb.0: ; %entry
7363; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7364; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7365; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7366; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7367; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7368; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7369; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7370; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7371; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7372; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7373; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7374; GFX10-WGP-NEXT:    buffer_gl1_inv
7375; GFX10-WGP-NEXT:    buffer_gl0_inv
7376; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7377; GFX10-WGP-NEXT:    s_endpgm
7378;
7379; GFX10-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7380; GFX10-CU:       ; %bb.0: ; %entry
7381; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7382; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7383; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7384; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7385; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7386; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7387; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7388; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7389; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7390; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7391; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7392; GFX10-CU-NEXT:    buffer_gl1_inv
7393; GFX10-CU-NEXT:    buffer_gl0_inv
7394; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7395; GFX10-CU-NEXT:    s_endpgm
7396;
7397; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7398; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7399; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7400; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7401; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7402; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7403; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7404; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7405; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7406; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7407; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7408; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7409; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7410; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7411; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7412; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7413; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7414; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7415; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7416; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7417; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7418; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7419; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7420; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7421; SKIP-CACHE-INV-NEXT:    s_endpgm
7422;
7423; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7424; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7425; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7426; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7427; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7428; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7429; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7430; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7431; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7432; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7433; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7434; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7435; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7436; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
7437; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
7438; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7439; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7440;
7441; GFX90A-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7442; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7443; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7444; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7445; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7446; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7447; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7448; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7449; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7450; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7451; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7452; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7453; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7454; GFX90A-TGSPLIT-NEXT:    buffer_invl2
7455; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
7456; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7457; GFX90A-TGSPLIT-NEXT:    s_endpgm
7458;
7459; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7460; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7461; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7462; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7463; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7464; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7465; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7466; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7467; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7468; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7469; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7470; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7471; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7472; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
7473; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7474; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7475;
7476; GFX940-TGSPLIT-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7477; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7478; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7479; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7480; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7481; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7482; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7483; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7484; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7485; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7486; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7487; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7488; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7489; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
7490; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7491; GFX940-TGSPLIT-NEXT:    s_endpgm
7492;
7493; GFX11-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7494; GFX11-WGP:       ; %bb.0: ; %entry
7495; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7496; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7497; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7498; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7499; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7500; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7501; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7502; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7503; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7504; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7505; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7506; GFX11-WGP-NEXT:    buffer_gl1_inv
7507; GFX11-WGP-NEXT:    buffer_gl0_inv
7508; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7509; GFX11-WGP-NEXT:    s_endpgm
7510;
7511; GFX11-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7512; GFX11-CU:       ; %bb.0: ; %entry
7513; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7514; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7515; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7516; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7517; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7518; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7519; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7520; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7521; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7522; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7523; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7524; GFX11-CU-NEXT:    buffer_gl1_inv
7525; GFX11-CU-NEXT:    buffer_gl0_inv
7526; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7527; GFX11-CU-NEXT:    s_endpgm
7528;
7529; GFX12-WGP-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7530; GFX12-WGP:       ; %bb.0: ; %entry
7531; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7532; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7533; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7534; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7535; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7536; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7537; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7538; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7539; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7540; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7541; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7542; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
7543; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7544; GFX12-WGP-NEXT:    s_endpgm
7545;
7546; GFX12-CU-LABEL: global_system_acquire_acquire_ret_cmpxchg:
7547; GFX12-CU:       ; %bb.0: ; %entry
7548; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7549; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7550; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7551; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7552; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7553; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7554; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7555; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7556; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7557; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7558; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7559; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
7560; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7561; GFX12-CU-NEXT:    s_endpgm
7562    ptr addrspace(1) %out, i32 %in, i32 %old) {
7563entry:
7564  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7565  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire acquire
7566  %val0 = extractvalue { i32, i1 } %val, 0
7567  store i32 %val0, ptr addrspace(1) %out, align 4
7568  ret void
7569}
7570
7571define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
7572; GFX6-LABEL: global_system_release_acquire_ret_cmpxchg:
7573; GFX6:       ; %bb.0: ; %entry
7574; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7575; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7576; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7577; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7578; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7579; GFX6-NEXT:    s_mov_b32 s12, s5
7580; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7581; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7582; GFX6-NEXT:    s_mov_b32 s11, -1
7583; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7584; GFX6-NEXT:    s_mov_b32 s5, s12
7585; GFX6-NEXT:    s_mov_b32 s6, s11
7586; GFX6-NEXT:    s_mov_b32 s7, s10
7587; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7588; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7589; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7590; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7591; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7592; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7593; GFX6-NEXT:    s_waitcnt vmcnt(0)
7594; GFX6-NEXT:    buffer_wbinvl1
7595; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7596; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7597; GFX6-NEXT:    s_endpgm
7598;
7599; GFX7-LABEL: global_system_release_acquire_ret_cmpxchg:
7600; GFX7:       ; %bb.0: ; %entry
7601; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7602; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7603; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7604; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7605; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7606; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7607; GFX7-NEXT:    s_mov_b32 s6, s4
7608; GFX7-NEXT:    s_mov_b32 s7, s5
7609; GFX7-NEXT:    s_mov_b32 s11, s12
7610; GFX7-NEXT:    s_mov_b32 s10, s13
7611; GFX7-NEXT:    s_add_u32 s6, s6, s11
7612; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7613; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7614; GFX7-NEXT:    s_mov_b32 s7, s10
7615; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7616; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7617; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7618; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7619; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7620; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7621; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7622; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7623; GFX7-NEXT:    s_waitcnt vmcnt(0)
7624; GFX7-NEXT:    buffer_wbinvl1_vol
7625; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7626; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7627; GFX7-NEXT:    flat_store_dword v[0:1], v2
7628; GFX7-NEXT:    s_endpgm
7629;
7630; GFX10-WGP-LABEL: global_system_release_acquire_ret_cmpxchg:
7631; GFX10-WGP:       ; %bb.0: ; %entry
7632; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7633; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7634; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7635; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7636; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7637; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7638; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7639; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7640; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7641; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7642; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7643; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7644; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7645; GFX10-WGP-NEXT:    buffer_gl1_inv
7646; GFX10-WGP-NEXT:    buffer_gl0_inv
7647; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7648; GFX10-WGP-NEXT:    s_endpgm
7649;
7650; GFX10-CU-LABEL: global_system_release_acquire_ret_cmpxchg:
7651; GFX10-CU:       ; %bb.0: ; %entry
7652; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7653; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7654; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7655; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7656; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7657; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7658; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7659; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7660; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7661; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7662; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7663; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7664; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7665; GFX10-CU-NEXT:    buffer_gl1_inv
7666; GFX10-CU-NEXT:    buffer_gl0_inv
7667; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7668; GFX10-CU-NEXT:    s_endpgm
7669;
7670; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg:
7671; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7672; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7673; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7674; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7675; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7676; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7677; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7678; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7679; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7680; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7681; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7682; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7683; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7684; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7685; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7686; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7687; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7688; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7689; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7690; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7691; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7692; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7693; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7694; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7695; SKIP-CACHE-INV-NEXT:    s_endpgm
7696;
7697; GFX90A-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg:
7698; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7699; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7700; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7701; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7702; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7703; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7704; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7705; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7706; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7707; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7708; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
7709; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7710; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7711; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7712; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
7713; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
7714; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7715; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
7716;
7717; GFX90A-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg:
7718; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
7719; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7720; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7721; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
7722; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
7723; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7724; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
7725; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
7726; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7727; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7728; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
7729; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7730; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
7731; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7732; GFX90A-TGSPLIT-NEXT:    buffer_invl2
7733; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
7734; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
7735; GFX90A-TGSPLIT-NEXT:    s_endpgm
7736;
7737; GFX940-NOTTGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg:
7738; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
7739; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7740; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7741; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7742; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7743; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7744; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7745; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7746; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7747; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7748; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
7749; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7750; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7751; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7752; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
7753; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7754; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
7755;
7756; GFX940-TGSPLIT-LABEL: global_system_release_acquire_ret_cmpxchg:
7757; GFX940-TGSPLIT:       ; %bb.0: ; %entry
7758; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
7759; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
7760; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
7761; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
7762; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
7763; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
7764; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
7765; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7766; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
7767; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
7768; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7769; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
7770; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
7771; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
7772; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
7773; GFX940-TGSPLIT-NEXT:    s_endpgm
7774;
7775; GFX11-WGP-LABEL: global_system_release_acquire_ret_cmpxchg:
7776; GFX11-WGP:       ; %bb.0: ; %entry
7777; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
7778; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7779; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7780; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7781; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7782; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
7783; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
7784; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7785; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
7786; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7787; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7788; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7789; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
7790; GFX11-WGP-NEXT:    buffer_gl1_inv
7791; GFX11-WGP-NEXT:    buffer_gl0_inv
7792; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7793; GFX11-WGP-NEXT:    s_endpgm
7794;
7795; GFX11-CU-LABEL: global_system_release_acquire_ret_cmpxchg:
7796; GFX11-CU:       ; %bb.0: ; %entry
7797; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
7798; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7799; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7800; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7801; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
7802; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
7803; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
7804; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7805; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
7806; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7807; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7808; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
7809; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
7810; GFX11-CU-NEXT:    buffer_gl1_inv
7811; GFX11-CU-NEXT:    buffer_gl0_inv
7812; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7813; GFX11-CU-NEXT:    s_endpgm
7814;
7815; GFX12-WGP-LABEL: global_system_release_acquire_ret_cmpxchg:
7816; GFX12-WGP:       ; %bb.0: ; %entry
7817; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
7818; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7819; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
7820; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
7821; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
7822; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
7823; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
7824; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7825; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
7826; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
7827; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7828; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7829; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
7830; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
7831; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7832; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
7833; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
7834; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
7835; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
7836; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
7837; GFX12-WGP-NEXT:    s_endpgm
7838;
7839; GFX12-CU-LABEL: global_system_release_acquire_ret_cmpxchg:
7840; GFX12-CU:       ; %bb.0: ; %entry
7841; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
7842; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
7843; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
7844; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
7845; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
7846; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
7847; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
7848; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7849; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
7850; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
7851; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7852; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7853; GFX12-CU-NEXT:    s_wait_storecnt 0x0
7854; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
7855; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
7856; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
7857; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
7858; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
7859; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
7860; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
7861; GFX12-CU-NEXT:    s_endpgm
7862    ptr addrspace(1) %out, i32 %in, i32 %old) {
7863entry:
7864  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
7865  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release acquire
7866  %val0 = extractvalue { i32, i1 } %val, 0
7867  store i32 %val0, ptr addrspace(1) %out, align 4
7868  ret void
7869}
7870
7871define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
7872; GFX6-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7873; GFX6:       ; %bb.0: ; %entry
7874; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
7875; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7876; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
7877; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
7878; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
7879; GFX6-NEXT:    s_mov_b32 s12, s5
7880; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
7881; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
7882; GFX6-NEXT:    s_mov_b32 s11, -1
7883; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
7884; GFX6-NEXT:    s_mov_b32 s5, s12
7885; GFX6-NEXT:    s_mov_b32 s6, s11
7886; GFX6-NEXT:    s_mov_b32 s7, s10
7887; GFX6-NEXT:    v_mov_b32_e32 v0, s9
7888; GFX6-NEXT:    v_mov_b32_e32 v2, s8
7889; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7890; GFX6-NEXT:    v_mov_b32_e32 v1, v2
7891; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7892; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
7893; GFX6-NEXT:    s_waitcnt vmcnt(0)
7894; GFX6-NEXT:    buffer_wbinvl1
7895; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7896; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
7897; GFX6-NEXT:    s_endpgm
7898;
7899; GFX7-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7900; GFX7:       ; %bb.0: ; %entry
7901; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
7902; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
7903; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
7904; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
7905; GFX7-NEXT:    s_mov_b64 s[12:13], 16
7906; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7907; GFX7-NEXT:    s_mov_b32 s6, s4
7908; GFX7-NEXT:    s_mov_b32 s7, s5
7909; GFX7-NEXT:    s_mov_b32 s11, s12
7910; GFX7-NEXT:    s_mov_b32 s10, s13
7911; GFX7-NEXT:    s_add_u32 s6, s6, s11
7912; GFX7-NEXT:    s_addc_u32 s10, s7, s10
7913; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
7914; GFX7-NEXT:    s_mov_b32 s7, s10
7915; GFX7-NEXT:    v_mov_b32_e32 v2, s9
7916; GFX7-NEXT:    v_mov_b32_e32 v0, s8
7917; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
7918; GFX7-NEXT:    v_mov_b32_e32 v3, v0
7919; GFX7-NEXT:    v_mov_b32_e32 v0, s6
7920; GFX7-NEXT:    v_mov_b32_e32 v1, s7
7921; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7922; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
7923; GFX7-NEXT:    s_waitcnt vmcnt(0)
7924; GFX7-NEXT:    buffer_wbinvl1_vol
7925; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7926; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7927; GFX7-NEXT:    flat_store_dword v[0:1], v2
7928; GFX7-NEXT:    s_endpgm
7929;
7930; GFX10-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7931; GFX10-WGP:       ; %bb.0: ; %entry
7932; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
7933; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7934; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
7935; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
7936; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
7937; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
7938; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
7939; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7940; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
7941; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7942; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
7943; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7944; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
7945; GFX10-WGP-NEXT:    buffer_gl1_inv
7946; GFX10-WGP-NEXT:    buffer_gl0_inv
7947; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
7948; GFX10-WGP-NEXT:    s_endpgm
7949;
7950; GFX10-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7951; GFX10-CU:       ; %bb.0: ; %entry
7952; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
7953; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
7954; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
7955; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
7956; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
7957; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
7958; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
7959; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
7960; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
7961; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7962; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
7963; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
7964; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
7965; GFX10-CU-NEXT:    buffer_gl1_inv
7966; GFX10-CU-NEXT:    buffer_gl0_inv
7967; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
7968; GFX10-CU-NEXT:    s_endpgm
7969;
7970; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7971; SKIP-CACHE-INV:       ; %bb.0: ; %entry
7972; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
7973; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
7974; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
7975; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
7976; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
7977; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
7978; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
7979; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
7980; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
7981; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
7982; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
7983; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
7984; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
7985; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
7986; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
7987; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
7988; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
7989; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7990; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
7991; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7992; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
7993; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
7994; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
7995; SKIP-CACHE-INV-NEXT:    s_endpgm
7996;
7997; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
7998; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
7999; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8000; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8001; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8002; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8003; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8004; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8005; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8006; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8007; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8008; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
8009; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8010; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8011; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8012; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
8013; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8014; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8015; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8016;
8017; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8018; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8019; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8020; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8021; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8022; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8023; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8024; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8025; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8026; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8027; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8028; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
8029; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8030; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8031; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8032; GFX90A-TGSPLIT-NEXT:    buffer_invl2
8033; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8034; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8035; GFX90A-TGSPLIT-NEXT:    s_endpgm
8036;
8037; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8038; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8039; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8040; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8041; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8042; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8043; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8044; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8045; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8046; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8047; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8048; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8049; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8050; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8051; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8052; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
8053; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8054; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8055;
8056; GFX940-TGSPLIT-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8057; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8058; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8059; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8060; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8061; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8062; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8063; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8064; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8065; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8066; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8067; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8068; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8069; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8070; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8071; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
8072; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8073; GFX940-TGSPLIT-NEXT:    s_endpgm
8074;
8075; GFX11-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8076; GFX11-WGP:       ; %bb.0: ; %entry
8077; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8078; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8079; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8080; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8081; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8082; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8083; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8084; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8085; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8086; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8087; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8088; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8089; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8090; GFX11-WGP-NEXT:    buffer_gl1_inv
8091; GFX11-WGP-NEXT:    buffer_gl0_inv
8092; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8093; GFX11-WGP-NEXT:    s_endpgm
8094;
8095; GFX11-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8096; GFX11-CU:       ; %bb.0: ; %entry
8097; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8098; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8099; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8100; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8101; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8102; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8103; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8104; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8105; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8106; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8107; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8108; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8109; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8110; GFX11-CU-NEXT:    buffer_gl1_inv
8111; GFX11-CU-NEXT:    buffer_gl0_inv
8112; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8113; GFX11-CU-NEXT:    s_endpgm
8114;
8115; GFX12-WGP-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8116; GFX12-WGP:       ; %bb.0: ; %entry
8117; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8118; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8119; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8120; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8121; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8122; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8123; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8124; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8125; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8126; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
8127; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8128; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8129; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
8130; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
8131; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8132; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8133; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8134; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8135; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
8136; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8137; GFX12-WGP-NEXT:    s_endpgm
8138;
8139; GFX12-CU-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
8140; GFX12-CU:       ; %bb.0: ; %entry
8141; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8142; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8143; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8144; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8145; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8146; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8147; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8148; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8149; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8150; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
8151; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8152; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8153; GFX12-CU-NEXT:    s_wait_storecnt 0x0
8154; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
8155; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8156; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8157; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8158; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8159; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
8160; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8161; GFX12-CU-NEXT:    s_endpgm
8162    ptr addrspace(1) %out, i32 %in, i32 %old) {
8163entry:
8164  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8165  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel acquire
8166  %val0 = extractvalue { i32, i1 } %val, 0
8167  store i32 %val0, ptr addrspace(1) %out, align 4
8168  ret void
8169}
8170
8171define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
8172; GFX6-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8173; GFX6:       ; %bb.0: ; %entry
8174; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8175; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8176; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8177; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8178; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8179; GFX6-NEXT:    s_mov_b32 s12, s5
8180; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8181; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8182; GFX6-NEXT:    s_mov_b32 s11, -1
8183; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8184; GFX6-NEXT:    s_mov_b32 s5, s12
8185; GFX6-NEXT:    s_mov_b32 s6, s11
8186; GFX6-NEXT:    s_mov_b32 s7, s10
8187; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8188; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8189; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8190; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8191; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8192; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8193; GFX6-NEXT:    s_waitcnt vmcnt(0)
8194; GFX6-NEXT:    buffer_wbinvl1
8195; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8196; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8197; GFX6-NEXT:    s_endpgm
8198;
8199; GFX7-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8200; GFX7:       ; %bb.0: ; %entry
8201; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8202; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8203; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8204; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8205; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8206; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8207; GFX7-NEXT:    s_mov_b32 s6, s4
8208; GFX7-NEXT:    s_mov_b32 s7, s5
8209; GFX7-NEXT:    s_mov_b32 s11, s12
8210; GFX7-NEXT:    s_mov_b32 s10, s13
8211; GFX7-NEXT:    s_add_u32 s6, s6, s11
8212; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8213; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8214; GFX7-NEXT:    s_mov_b32 s7, s10
8215; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8216; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8217; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8218; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8219; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8220; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8221; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8222; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8223; GFX7-NEXT:    s_waitcnt vmcnt(0)
8224; GFX7-NEXT:    buffer_wbinvl1_vol
8225; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8226; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8227; GFX7-NEXT:    flat_store_dword v[0:1], v2
8228; GFX7-NEXT:    s_endpgm
8229;
8230; GFX10-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8231; GFX10-WGP:       ; %bb.0: ; %entry
8232; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8233; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8234; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8235; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8236; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8237; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8238; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8239; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8240; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8241; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8242; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8243; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8244; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8245; GFX10-WGP-NEXT:    buffer_gl1_inv
8246; GFX10-WGP-NEXT:    buffer_gl0_inv
8247; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8248; GFX10-WGP-NEXT:    s_endpgm
8249;
8250; GFX10-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8251; GFX10-CU:       ; %bb.0: ; %entry
8252; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8253; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8254; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8255; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8256; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8257; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8258; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8259; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8260; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8261; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8262; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8263; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8264; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8265; GFX10-CU-NEXT:    buffer_gl1_inv
8266; GFX10-CU-NEXT:    buffer_gl0_inv
8267; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8268; GFX10-CU-NEXT:    s_endpgm
8269;
8270; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8271; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8272; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8273; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8274; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8275; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8276; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8277; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8278; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8279; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8280; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8281; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8282; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8283; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8284; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8285; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8286; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8287; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8288; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8289; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8290; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8291; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8292; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8293; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8294; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8295; SKIP-CACHE-INV-NEXT:    s_endpgm
8296;
8297; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8298; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8299; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8300; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8301; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8302; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8303; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8304; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8305; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8306; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8307; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8308; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
8309; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8310; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8311; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8312; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
8313; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8314; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8315; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8316;
8317; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8318; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8319; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8320; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8321; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8322; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8323; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8324; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8325; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8326; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8327; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8328; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
8329; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8330; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8331; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8332; GFX90A-TGSPLIT-NEXT:    buffer_invl2
8333; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8334; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8335; GFX90A-TGSPLIT-NEXT:    s_endpgm
8336;
8337; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8338; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8339; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8340; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8341; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8342; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8343; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8344; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8345; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8346; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8347; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8348; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8349; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8350; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8351; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8352; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
8353; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8354; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8355;
8356; GFX940-TGSPLIT-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8357; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8358; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8359; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8360; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8361; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8362; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8363; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8364; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8365; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8366; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8367; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8368; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8369; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8370; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8371; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
8372; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8373; GFX940-TGSPLIT-NEXT:    s_endpgm
8374;
8375; GFX11-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8376; GFX11-WGP:       ; %bb.0: ; %entry
8377; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8378; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8379; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8380; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8381; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8382; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8383; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8384; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8385; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8386; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8387; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8388; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8389; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8390; GFX11-WGP-NEXT:    buffer_gl1_inv
8391; GFX11-WGP-NEXT:    buffer_gl0_inv
8392; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8393; GFX11-WGP-NEXT:    s_endpgm
8394;
8395; GFX11-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8396; GFX11-CU:       ; %bb.0: ; %entry
8397; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8398; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8399; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8400; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8401; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8402; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8403; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8404; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8405; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8406; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8407; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8408; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8409; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8410; GFX11-CU-NEXT:    buffer_gl1_inv
8411; GFX11-CU-NEXT:    buffer_gl0_inv
8412; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8413; GFX11-CU-NEXT:    s_endpgm
8414;
8415; GFX12-WGP-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8416; GFX12-WGP:       ; %bb.0: ; %entry
8417; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8418; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8419; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8420; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8421; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8422; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8423; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8424; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8425; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8426; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
8427; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8428; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8429; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
8430; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
8431; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8432; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8433; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8434; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8435; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
8436; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8437; GFX12-WGP-NEXT:    s_endpgm
8438;
8439; GFX12-CU-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
8440; GFX12-CU:       ; %bb.0: ; %entry
8441; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8442; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8443; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8444; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8445; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8446; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8447; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8448; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8449; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8450; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
8451; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8452; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8453; GFX12-CU-NEXT:    s_wait_storecnt 0x0
8454; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
8455; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8456; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8457; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8458; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8459; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
8460; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8461; GFX12-CU-NEXT:    s_endpgm
8462    ptr addrspace(1) %out, i32 %in, i32 %old) {
8463entry:
8464  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8465  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst acquire
8466  %val0 = extractvalue { i32, i1 } %val, 0
8467  store i32 %val0, ptr addrspace(1) %out, align 4
8468  ret void
8469}
8470
8471define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
8472; GFX6-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8473; GFX6:       ; %bb.0: ; %entry
8474; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8475; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8476; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8477; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8478; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8479; GFX6-NEXT:    s_mov_b32 s12, s5
8480; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8481; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8482; GFX6-NEXT:    s_mov_b32 s11, -1
8483; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8484; GFX6-NEXT:    s_mov_b32 s5, s12
8485; GFX6-NEXT:    s_mov_b32 s6, s11
8486; GFX6-NEXT:    s_mov_b32 s7, s10
8487; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8488; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8489; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8490; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8491; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8492; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8493; GFX6-NEXT:    s_waitcnt vmcnt(0)
8494; GFX6-NEXT:    buffer_wbinvl1
8495; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8496; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8497; GFX6-NEXT:    s_endpgm
8498;
8499; GFX7-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8500; GFX7:       ; %bb.0: ; %entry
8501; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8502; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8503; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8504; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8505; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8506; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8507; GFX7-NEXT:    s_mov_b32 s6, s4
8508; GFX7-NEXT:    s_mov_b32 s7, s5
8509; GFX7-NEXT:    s_mov_b32 s11, s12
8510; GFX7-NEXT:    s_mov_b32 s10, s13
8511; GFX7-NEXT:    s_add_u32 s6, s6, s11
8512; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8513; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8514; GFX7-NEXT:    s_mov_b32 s7, s10
8515; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8516; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8517; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8518; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8519; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8520; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8521; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8522; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8523; GFX7-NEXT:    s_waitcnt vmcnt(0)
8524; GFX7-NEXT:    buffer_wbinvl1_vol
8525; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8526; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8527; GFX7-NEXT:    flat_store_dword v[0:1], v2
8528; GFX7-NEXT:    s_endpgm
8529;
8530; GFX10-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8531; GFX10-WGP:       ; %bb.0: ; %entry
8532; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8533; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8534; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8535; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8536; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8537; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8538; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8539; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8540; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8541; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8542; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8543; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8544; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8545; GFX10-WGP-NEXT:    buffer_gl1_inv
8546; GFX10-WGP-NEXT:    buffer_gl0_inv
8547; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8548; GFX10-WGP-NEXT:    s_endpgm
8549;
8550; GFX10-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8551; GFX10-CU:       ; %bb.0: ; %entry
8552; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8553; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8554; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8555; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8556; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8557; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8558; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8559; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8560; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8561; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8562; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8563; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8564; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8565; GFX10-CU-NEXT:    buffer_gl1_inv
8566; GFX10-CU-NEXT:    buffer_gl0_inv
8567; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8568; GFX10-CU-NEXT:    s_endpgm
8569;
8570; SKIP-CACHE-INV-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8571; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8572; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8573; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8574; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8575; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8576; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8577; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8578; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8579; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8580; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8581; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8582; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8583; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8584; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8585; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8586; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8587; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8588; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8589; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8590; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8591; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8592; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8593; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8594; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8595; SKIP-CACHE-INV-NEXT:    s_endpgm
8596;
8597; GFX90A-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8598; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8599; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8600; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8601; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8602; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8603; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8604; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8605; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8606; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8607; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8608; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
8609; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8610; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8611; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8612; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
8613; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8614; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8615; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8616;
8617; GFX90A-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8618; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8619; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8620; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8621; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8622; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8623; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8624; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8625; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8626; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8627; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8628; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
8629; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8630; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8631; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8632; GFX90A-TGSPLIT-NEXT:    buffer_invl2
8633; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8634; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8635; GFX90A-TGSPLIT-NEXT:    s_endpgm
8636;
8637; GFX940-NOTTGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8638; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8639; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8640; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8641; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8642; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8643; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8644; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8645; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8646; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8647; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8648; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8649; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8650; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8651; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8652; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
8653; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8654; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8655;
8656; GFX940-TGSPLIT-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8657; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8658; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8659; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8660; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8661; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8662; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8663; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8664; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8665; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8666; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8667; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8668; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8669; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8670; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8671; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
8672; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8673; GFX940-TGSPLIT-NEXT:    s_endpgm
8674;
8675; GFX11-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8676; GFX11-WGP:       ; %bb.0: ; %entry
8677; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8678; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8679; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8680; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8681; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8682; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8683; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8684; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8685; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8686; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8687; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8688; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8689; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8690; GFX11-WGP-NEXT:    buffer_gl1_inv
8691; GFX11-WGP-NEXT:    buffer_gl0_inv
8692; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8693; GFX11-WGP-NEXT:    s_endpgm
8694;
8695; GFX11-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8696; GFX11-CU:       ; %bb.0: ; %entry
8697; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8698; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8699; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8700; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8701; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
8702; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
8703; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
8704; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8705; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
8706; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8707; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8708; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8709; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
8710; GFX11-CU-NEXT:    buffer_gl1_inv
8711; GFX11-CU-NEXT:    buffer_gl0_inv
8712; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8713; GFX11-CU-NEXT:    s_endpgm
8714;
8715; GFX12-WGP-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8716; GFX12-WGP:       ; %bb.0: ; %entry
8717; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
8718; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8719; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8720; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8721; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
8722; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
8723; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
8724; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8725; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
8726; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
8727; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8728; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8729; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
8730; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
8731; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8732; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
8733; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
8734; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
8735; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
8736; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8737; GFX12-WGP-NEXT:    s_endpgm
8738;
8739; GFX12-CU-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
8740; GFX12-CU:       ; %bb.0: ; %entry
8741; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
8742; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8743; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
8744; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
8745; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
8746; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
8747; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
8748; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8749; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
8750; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
8751; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8752; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8753; GFX12-CU-NEXT:    s_wait_storecnt 0x0
8754; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
8755; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
8756; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
8757; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
8758; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
8759; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
8760; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
8761; GFX12-CU-NEXT:    s_endpgm
8762    ptr addrspace(1) %out, i32 %in, i32 %old) {
8763entry:
8764  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
8765  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in monotonic seq_cst
8766  %val0 = extractvalue { i32, i1 } %val, 0
8767  store i32 %val0, ptr addrspace(1) %out, align 4
8768  ret void
8769}
8770
8771define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
8772; GFX6-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8773; GFX6:       ; %bb.0: ; %entry
8774; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
8775; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8776; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
8777; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
8778; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
8779; GFX6-NEXT:    s_mov_b32 s12, s5
8780; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
8781; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
8782; GFX6-NEXT:    s_mov_b32 s11, -1
8783; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
8784; GFX6-NEXT:    s_mov_b32 s5, s12
8785; GFX6-NEXT:    s_mov_b32 s6, s11
8786; GFX6-NEXT:    s_mov_b32 s7, s10
8787; GFX6-NEXT:    v_mov_b32_e32 v0, s9
8788; GFX6-NEXT:    v_mov_b32_e32 v2, s8
8789; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8790; GFX6-NEXT:    v_mov_b32_e32 v1, v2
8791; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8792; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
8793; GFX6-NEXT:    s_waitcnt vmcnt(0)
8794; GFX6-NEXT:    buffer_wbinvl1
8795; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8796; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
8797; GFX6-NEXT:    s_endpgm
8798;
8799; GFX7-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8800; GFX7:       ; %bb.0: ; %entry
8801; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
8802; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
8803; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
8804; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
8805; GFX7-NEXT:    s_mov_b64 s[12:13], 16
8806; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8807; GFX7-NEXT:    s_mov_b32 s6, s4
8808; GFX7-NEXT:    s_mov_b32 s7, s5
8809; GFX7-NEXT:    s_mov_b32 s11, s12
8810; GFX7-NEXT:    s_mov_b32 s10, s13
8811; GFX7-NEXT:    s_add_u32 s6, s6, s11
8812; GFX7-NEXT:    s_addc_u32 s10, s7, s10
8813; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
8814; GFX7-NEXT:    s_mov_b32 s7, s10
8815; GFX7-NEXT:    v_mov_b32_e32 v2, s9
8816; GFX7-NEXT:    v_mov_b32_e32 v0, s8
8817; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8818; GFX7-NEXT:    v_mov_b32_e32 v3, v0
8819; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8820; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8821; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8822; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
8823; GFX7-NEXT:    s_waitcnt vmcnt(0)
8824; GFX7-NEXT:    buffer_wbinvl1_vol
8825; GFX7-NEXT:    v_mov_b32_e32 v0, s4
8826; GFX7-NEXT:    v_mov_b32_e32 v1, s5
8827; GFX7-NEXT:    flat_store_dword v[0:1], v2
8828; GFX7-NEXT:    s_endpgm
8829;
8830; GFX10-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8831; GFX10-WGP:       ; %bb.0: ; %entry
8832; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
8833; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8834; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
8835; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
8836; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8837; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
8838; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
8839; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8840; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
8841; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8842; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8843; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8844; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
8845; GFX10-WGP-NEXT:    buffer_gl1_inv
8846; GFX10-WGP-NEXT:    buffer_gl0_inv
8847; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
8848; GFX10-WGP-NEXT:    s_endpgm
8849;
8850; GFX10-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8851; GFX10-CU:       ; %bb.0: ; %entry
8852; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
8853; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8854; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
8855; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
8856; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
8857; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
8858; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
8859; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8860; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
8861; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8862; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
8863; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
8864; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
8865; GFX10-CU-NEXT:    buffer_gl1_inv
8866; GFX10-CU-NEXT:    buffer_gl0_inv
8867; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
8868; GFX10-CU-NEXT:    s_endpgm
8869;
8870; SKIP-CACHE-INV-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8871; SKIP-CACHE-INV:       ; %bb.0: ; %entry
8872; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
8873; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
8874; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
8875; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
8876; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
8877; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
8878; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
8879; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
8880; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
8881; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
8882; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
8883; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
8884; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
8885; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
8886; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
8887; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
8888; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
8889; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8890; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
8891; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8892; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
8893; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
8894; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
8895; SKIP-CACHE-INV-NEXT:    s_endpgm
8896;
8897; GFX90A-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8898; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
8899; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8900; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8901; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8902; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8903; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8904; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8905; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8906; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8907; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8908; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
8909; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8910; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8911; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8912; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
8913; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
8914; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8915; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
8916;
8917; GFX90A-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8918; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
8919; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8920; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
8921; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
8922; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
8923; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8924; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
8925; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
8926; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8927; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8928; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
8929; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8930; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
8931; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8932; GFX90A-TGSPLIT-NEXT:    buffer_invl2
8933; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
8934; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
8935; GFX90A-TGSPLIT-NEXT:    s_endpgm
8936;
8937; GFX940-NOTTGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8938; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
8939; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8940; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8941; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8942; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8943; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8944; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8945; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8946; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8947; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8948; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8949; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8950; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8951; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8952; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
8953; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8954; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
8955;
8956; GFX940-TGSPLIT-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8957; GFX940-TGSPLIT:       ; %bb.0: ; %entry
8958; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
8959; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
8960; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
8961; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
8962; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
8963; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
8964; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
8965; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
8966; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
8967; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
8968; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8969; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
8970; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
8971; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
8972; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
8973; GFX940-TGSPLIT-NEXT:    s_endpgm
8974;
8975; GFX11-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8976; GFX11-WGP:       ; %bb.0: ; %entry
8977; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
8978; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8979; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
8980; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
8981; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
8982; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
8983; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
8984; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
8985; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
8986; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8987; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
8988; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
8989; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
8990; GFX11-WGP-NEXT:    buffer_gl1_inv
8991; GFX11-WGP-NEXT:    buffer_gl0_inv
8992; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
8993; GFX11-WGP-NEXT:    s_endpgm
8994;
8995; GFX11-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
8996; GFX11-CU:       ; %bb.0: ; %entry
8997; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
8998; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
8999; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9000; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9001; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9002; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9003; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9004; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9005; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9006; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9007; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9008; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9009; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9010; GFX11-CU-NEXT:    buffer_gl1_inv
9011; GFX11-CU-NEXT:    buffer_gl0_inv
9012; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9013; GFX11-CU-NEXT:    s_endpgm
9014;
9015; GFX12-WGP-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
9016; GFX12-WGP:       ; %bb.0: ; %entry
9017; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9018; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9019; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9020; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9021; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9022; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9023; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9024; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9025; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9026; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
9027; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9028; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9029; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9030; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9031; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9032; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9033; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
9034; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9035; GFX12-WGP-NEXT:    s_endpgm
9036;
9037; GFX12-CU-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
9038; GFX12-CU:       ; %bb.0: ; %entry
9039; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9040; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9041; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9042; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9043; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9044; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9045; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9046; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9047; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9048; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
9049; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9050; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9051; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9052; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9053; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9054; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9055; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
9056; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9057; GFX12-CU-NEXT:    s_endpgm
9058    ptr addrspace(1) %out, i32 %in, i32 %old) {
9059entry:
9060  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9061  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acquire seq_cst
9062  %val0 = extractvalue { i32, i1 } %val, 0
9063  store i32 %val0, ptr addrspace(1) %out, align 4
9064  ret void
9065}
9066
9067define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
9068; GFX6-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9069; GFX6:       ; %bb.0: ; %entry
9070; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9071; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9072; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9073; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9074; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9075; GFX6-NEXT:    s_mov_b32 s12, s5
9076; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9077; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9078; GFX6-NEXT:    s_mov_b32 s11, -1
9079; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9080; GFX6-NEXT:    s_mov_b32 s5, s12
9081; GFX6-NEXT:    s_mov_b32 s6, s11
9082; GFX6-NEXT:    s_mov_b32 s7, s10
9083; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9084; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9085; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9086; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9087; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9088; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9089; GFX6-NEXT:    s_waitcnt vmcnt(0)
9090; GFX6-NEXT:    buffer_wbinvl1
9091; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9092; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9093; GFX6-NEXT:    s_endpgm
9094;
9095; GFX7-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9096; GFX7:       ; %bb.0: ; %entry
9097; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9098; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9099; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9100; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9101; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9102; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9103; GFX7-NEXT:    s_mov_b32 s6, s4
9104; GFX7-NEXT:    s_mov_b32 s7, s5
9105; GFX7-NEXT:    s_mov_b32 s11, s12
9106; GFX7-NEXT:    s_mov_b32 s10, s13
9107; GFX7-NEXT:    s_add_u32 s6, s6, s11
9108; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9109; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9110; GFX7-NEXT:    s_mov_b32 s7, s10
9111; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9112; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9113; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9114; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9115; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9116; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9117; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9118; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9119; GFX7-NEXT:    s_waitcnt vmcnt(0)
9120; GFX7-NEXT:    buffer_wbinvl1_vol
9121; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9122; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9123; GFX7-NEXT:    flat_store_dword v[0:1], v2
9124; GFX7-NEXT:    s_endpgm
9125;
9126; GFX10-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9127; GFX10-WGP:       ; %bb.0: ; %entry
9128; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9129; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9130; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9131; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9132; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9133; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9134; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9135; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9136; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9137; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9138; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9139; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9140; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9141; GFX10-WGP-NEXT:    buffer_gl1_inv
9142; GFX10-WGP-NEXT:    buffer_gl0_inv
9143; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9144; GFX10-WGP-NEXT:    s_endpgm
9145;
9146; GFX10-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9147; GFX10-CU:       ; %bb.0: ; %entry
9148; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9149; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9150; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9151; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9152; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9153; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9154; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9155; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9156; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9157; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9158; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9159; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9160; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9161; GFX10-CU-NEXT:    buffer_gl1_inv
9162; GFX10-CU-NEXT:    buffer_gl0_inv
9163; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9164; GFX10-CU-NEXT:    s_endpgm
9165;
9166; SKIP-CACHE-INV-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9167; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9168; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9169; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9170; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9171; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9172; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9173; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9174; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9175; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9176; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9177; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9178; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9179; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9180; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9181; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9182; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9183; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9184; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9185; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9186; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9187; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9188; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9189; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9190; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9191; SKIP-CACHE-INV-NEXT:    s_endpgm
9192;
9193; GFX90A-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9194; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9195; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9196; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9197; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9198; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9199; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9200; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9201; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9202; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9203; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9204; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
9205; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9206; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9207; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9208; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
9209; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9210; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9211; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9212;
9213; GFX90A-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9214; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9215; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9216; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9217; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9218; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9219; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9220; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9221; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9222; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9223; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9224; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
9225; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9226; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9227; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9228; GFX90A-TGSPLIT-NEXT:    buffer_invl2
9229; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9230; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9231; GFX90A-TGSPLIT-NEXT:    s_endpgm
9232;
9233; GFX940-NOTTGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9234; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9235; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9236; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9237; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9238; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9239; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9240; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9241; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9242; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9243; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9244; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9245; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9246; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9247; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9248; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
9249; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9250; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9251;
9252; GFX940-TGSPLIT-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9253; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9254; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9255; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9256; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9257; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9258; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9259; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9260; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9261; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9262; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9263; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9264; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9265; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9266; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9267; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
9268; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9269; GFX940-TGSPLIT-NEXT:    s_endpgm
9270;
9271; GFX11-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9272; GFX11-WGP:       ; %bb.0: ; %entry
9273; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9274; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9275; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9276; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9277; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9278; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9279; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9280; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9281; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9282; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9283; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9284; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9285; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9286; GFX11-WGP-NEXT:    buffer_gl1_inv
9287; GFX11-WGP-NEXT:    buffer_gl0_inv
9288; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9289; GFX11-WGP-NEXT:    s_endpgm
9290;
9291; GFX11-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9292; GFX11-CU:       ; %bb.0: ; %entry
9293; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9294; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9295; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9296; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9297; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9298; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9299; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9300; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9301; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9302; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9303; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9304; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9305; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9306; GFX11-CU-NEXT:    buffer_gl1_inv
9307; GFX11-CU-NEXT:    buffer_gl0_inv
9308; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9309; GFX11-CU-NEXT:    s_endpgm
9310;
9311; GFX12-WGP-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9312; GFX12-WGP:       ; %bb.0: ; %entry
9313; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9314; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9315; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9316; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9317; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9318; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9319; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9320; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9321; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9322; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
9323; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9324; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9325; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9326; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9327; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9328; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9329; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9330; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9331; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
9332; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9333; GFX12-WGP-NEXT:    s_endpgm
9334;
9335; GFX12-CU-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
9336; GFX12-CU:       ; %bb.0: ; %entry
9337; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9338; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9339; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9340; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9341; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9342; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9343; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9344; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9345; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9346; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
9347; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9348; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9349; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9350; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9351; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9352; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9353; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9354; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9355; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
9356; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9357; GFX12-CU-NEXT:    s_endpgm
9358    ptr addrspace(1) %out, i32 %in, i32 %old) {
9359entry:
9360  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9361  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in release seq_cst
9362  %val0 = extractvalue { i32, i1 } %val, 0
9363  store i32 %val0, ptr addrspace(1) %out, align 4
9364  ret void
9365}
9366
9367define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
9368; GFX6-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9369; GFX6:       ; %bb.0: ; %entry
9370; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9371; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9372; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9373; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9374; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9375; GFX6-NEXT:    s_mov_b32 s12, s5
9376; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9377; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9378; GFX6-NEXT:    s_mov_b32 s11, -1
9379; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9380; GFX6-NEXT:    s_mov_b32 s5, s12
9381; GFX6-NEXT:    s_mov_b32 s6, s11
9382; GFX6-NEXT:    s_mov_b32 s7, s10
9383; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9384; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9385; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9386; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9387; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9388; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9389; GFX6-NEXT:    s_waitcnt vmcnt(0)
9390; GFX6-NEXT:    buffer_wbinvl1
9391; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9392; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9393; GFX6-NEXT:    s_endpgm
9394;
9395; GFX7-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9396; GFX7:       ; %bb.0: ; %entry
9397; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9398; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9399; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9400; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9401; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9402; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9403; GFX7-NEXT:    s_mov_b32 s6, s4
9404; GFX7-NEXT:    s_mov_b32 s7, s5
9405; GFX7-NEXT:    s_mov_b32 s11, s12
9406; GFX7-NEXT:    s_mov_b32 s10, s13
9407; GFX7-NEXT:    s_add_u32 s6, s6, s11
9408; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9409; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9410; GFX7-NEXT:    s_mov_b32 s7, s10
9411; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9412; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9413; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9414; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9415; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9416; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9417; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9418; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9419; GFX7-NEXT:    s_waitcnt vmcnt(0)
9420; GFX7-NEXT:    buffer_wbinvl1_vol
9421; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9422; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9423; GFX7-NEXT:    flat_store_dword v[0:1], v2
9424; GFX7-NEXT:    s_endpgm
9425;
9426; GFX10-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9427; GFX10-WGP:       ; %bb.0: ; %entry
9428; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9429; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9430; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9431; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9432; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9433; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9434; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9435; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9436; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9437; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9438; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9439; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9440; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9441; GFX10-WGP-NEXT:    buffer_gl1_inv
9442; GFX10-WGP-NEXT:    buffer_gl0_inv
9443; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9444; GFX10-WGP-NEXT:    s_endpgm
9445;
9446; GFX10-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9447; GFX10-CU:       ; %bb.0: ; %entry
9448; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9449; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9450; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9451; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9452; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9453; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9454; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9455; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9456; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9457; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9458; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9459; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9460; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9461; GFX10-CU-NEXT:    buffer_gl1_inv
9462; GFX10-CU-NEXT:    buffer_gl0_inv
9463; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9464; GFX10-CU-NEXT:    s_endpgm
9465;
9466; SKIP-CACHE-INV-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9467; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9468; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9469; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9470; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9471; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9472; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9473; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9474; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9475; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9476; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9477; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9478; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9479; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9480; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9481; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9482; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9483; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9484; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9485; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9486; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9487; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9488; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9489; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9490; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9491; SKIP-CACHE-INV-NEXT:    s_endpgm
9492;
9493; GFX90A-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9494; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9495; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9496; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9497; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9498; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9499; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9500; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9501; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9502; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9503; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9504; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
9505; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9506; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9507; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9508; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
9509; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9510; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9511; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9512;
9513; GFX90A-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9514; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9515; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9516; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9517; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9518; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9519; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9520; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9521; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9522; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9523; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9524; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
9525; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9526; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9527; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9528; GFX90A-TGSPLIT-NEXT:    buffer_invl2
9529; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9530; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9531; GFX90A-TGSPLIT-NEXT:    s_endpgm
9532;
9533; GFX940-NOTTGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9534; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9535; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9536; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9537; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9538; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9539; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9540; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9541; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9542; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9543; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9544; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9545; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9546; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9547; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9548; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
9549; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9550; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9551;
9552; GFX940-TGSPLIT-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9553; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9554; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9555; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9556; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9557; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9558; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9559; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9560; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9561; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9562; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9563; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9564; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9565; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9566; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9567; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
9568; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9569; GFX940-TGSPLIT-NEXT:    s_endpgm
9570;
9571; GFX11-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9572; GFX11-WGP:       ; %bb.0: ; %entry
9573; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9574; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9575; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9576; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9577; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9578; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9579; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9580; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9581; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9582; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9583; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9584; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9585; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9586; GFX11-WGP-NEXT:    buffer_gl1_inv
9587; GFX11-WGP-NEXT:    buffer_gl0_inv
9588; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9589; GFX11-WGP-NEXT:    s_endpgm
9590;
9591; GFX11-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9592; GFX11-CU:       ; %bb.0: ; %entry
9593; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9594; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9595; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9596; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9597; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9598; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9599; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9600; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9601; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9602; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9603; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9604; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9605; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9606; GFX11-CU-NEXT:    buffer_gl1_inv
9607; GFX11-CU-NEXT:    buffer_gl0_inv
9608; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9609; GFX11-CU-NEXT:    s_endpgm
9610;
9611; GFX12-WGP-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9612; GFX12-WGP:       ; %bb.0: ; %entry
9613; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9614; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9615; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9616; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9617; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9618; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9619; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9620; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9621; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9622; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
9623; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9624; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9625; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9626; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9627; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9628; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9629; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9630; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9631; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
9632; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9633; GFX12-WGP-NEXT:    s_endpgm
9634;
9635; GFX12-CU-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
9636; GFX12-CU:       ; %bb.0: ; %entry
9637; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9638; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9639; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9640; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9641; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9642; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9643; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9644; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9645; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9646; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
9647; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9648; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9649; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9650; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9651; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9652; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9653; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9654; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9655; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
9656; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9657; GFX12-CU-NEXT:    s_endpgm
9658    ptr addrspace(1) %out, i32 %in, i32 %old) {
9659entry:
9660  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9661  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in acq_rel seq_cst
9662  %val0 = extractvalue { i32, i1 } %val, 0
9663  store i32 %val0, ptr addrspace(1) %out, align 4
9664  ret void
9665}
9666
9667define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
9668; GFX6-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9669; GFX6:       ; %bb.0: ; %entry
9670; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
9671; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9672; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
9673; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
9674; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9675; GFX6-NEXT:    s_mov_b32 s12, s5
9676; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9677; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
9678; GFX6-NEXT:    s_mov_b32 s11, -1
9679; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9680; GFX6-NEXT:    s_mov_b32 s5, s12
9681; GFX6-NEXT:    s_mov_b32 s6, s11
9682; GFX6-NEXT:    s_mov_b32 s7, s10
9683; GFX6-NEXT:    v_mov_b32_e32 v0, s9
9684; GFX6-NEXT:    v_mov_b32_e32 v2, s8
9685; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9686; GFX6-NEXT:    v_mov_b32_e32 v1, v2
9687; GFX6-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9688; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
9689; GFX6-NEXT:    s_waitcnt vmcnt(0)
9690; GFX6-NEXT:    buffer_wbinvl1
9691; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9692; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9693; GFX6-NEXT:    s_endpgm
9694;
9695; GFX7-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9696; GFX7:       ; %bb.0: ; %entry
9697; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
9698; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
9699; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
9700; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
9701; GFX7-NEXT:    s_mov_b64 s[12:13], 16
9702; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9703; GFX7-NEXT:    s_mov_b32 s6, s4
9704; GFX7-NEXT:    s_mov_b32 s7, s5
9705; GFX7-NEXT:    s_mov_b32 s11, s12
9706; GFX7-NEXT:    s_mov_b32 s10, s13
9707; GFX7-NEXT:    s_add_u32 s6, s6, s11
9708; GFX7-NEXT:    s_addc_u32 s10, s7, s10
9709; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
9710; GFX7-NEXT:    s_mov_b32 s7, s10
9711; GFX7-NEXT:    v_mov_b32_e32 v2, s9
9712; GFX7-NEXT:    v_mov_b32_e32 v0, s8
9713; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9714; GFX7-NEXT:    v_mov_b32_e32 v3, v0
9715; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9716; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9717; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9718; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
9719; GFX7-NEXT:    s_waitcnt vmcnt(0)
9720; GFX7-NEXT:    buffer_wbinvl1_vol
9721; GFX7-NEXT:    v_mov_b32_e32 v0, s4
9722; GFX7-NEXT:    v_mov_b32_e32 v1, s5
9723; GFX7-NEXT:    flat_store_dword v[0:1], v2
9724; GFX7-NEXT:    s_endpgm
9725;
9726; GFX10-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9727; GFX10-WGP:       ; %bb.0: ; %entry
9728; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
9729; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9730; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
9731; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
9732; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9733; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
9734; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
9735; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9736; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
9737; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9738; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9739; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9740; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
9741; GFX10-WGP-NEXT:    buffer_gl1_inv
9742; GFX10-WGP-NEXT:    buffer_gl0_inv
9743; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
9744; GFX10-WGP-NEXT:    s_endpgm
9745;
9746; GFX10-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9747; GFX10-CU:       ; %bb.0: ; %entry
9748; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
9749; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9750; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
9751; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
9752; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
9753; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
9754; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
9755; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9756; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
9757; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9758; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9759; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
9760; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
9761; GFX10-CU-NEXT:    buffer_gl1_inv
9762; GFX10-CU-NEXT:    buffer_gl0_inv
9763; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
9764; GFX10-CU-NEXT:    s_endpgm
9765;
9766; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9767; SKIP-CACHE-INV:       ; %bb.0: ; %entry
9768; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
9769; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
9770; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
9771; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
9772; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
9773; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
9774; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
9775; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
9776; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
9777; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
9778; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
9779; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
9780; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
9781; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
9782; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
9783; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
9784; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
9785; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9786; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
9787; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9788; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
9789; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
9790; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
9791; SKIP-CACHE-INV-NEXT:    s_endpgm
9792;
9793; GFX90A-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9794; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
9795; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9796; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9797; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9798; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9799; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9800; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9801; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9802; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9803; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9804; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
9805; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9806; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9807; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9808; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
9809; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
9810; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9811; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
9812;
9813; GFX90A-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9814; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
9815; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9816; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
9817; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
9818; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
9819; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9820; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
9821; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
9822; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9823; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9824; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
9825; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9826; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
9827; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9828; GFX90A-TGSPLIT-NEXT:    buffer_invl2
9829; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
9830; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
9831; GFX90A-TGSPLIT-NEXT:    s_endpgm
9832;
9833; GFX940-NOTTGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9834; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
9835; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9836; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9837; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9838; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9839; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9840; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9841; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9842; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9843; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9844; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9845; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9846; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9847; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9848; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
9849; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9850; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
9851;
9852; GFX940-TGSPLIT-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9853; GFX940-TGSPLIT:       ; %bb.0: ; %entry
9854; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
9855; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
9856; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
9857; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
9858; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
9859; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
9860; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
9861; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
9862; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
9863; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
9864; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9865; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
9866; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
9867; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
9868; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
9869; GFX940-TGSPLIT-NEXT:    s_endpgm
9870;
9871; GFX11-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9872; GFX11-WGP:       ; %bb.0: ; %entry
9873; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
9874; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9875; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9876; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9877; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
9878; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
9879; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
9880; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9881; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
9882; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9883; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
9884; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9885; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
9886; GFX11-WGP-NEXT:    buffer_gl1_inv
9887; GFX11-WGP-NEXT:    buffer_gl0_inv
9888; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9889; GFX11-WGP-NEXT:    s_endpgm
9890;
9891; GFX11-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9892; GFX11-CU:       ; %bb.0: ; %entry
9893; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
9894; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9895; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9896; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9897; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
9898; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
9899; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
9900; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9901; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
9902; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9903; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
9904; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
9905; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
9906; GFX11-CU-NEXT:    buffer_gl1_inv
9907; GFX11-CU-NEXT:    buffer_gl0_inv
9908; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9909; GFX11-CU-NEXT:    s_endpgm
9910;
9911; GFX12-WGP-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9912; GFX12-WGP:       ; %bb.0: ; %entry
9913; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
9914; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9915; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
9916; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
9917; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
9918; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
9919; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
9920; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9921; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
9922; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
9923; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9924; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9925; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
9926; GFX12-WGP-NEXT:    s_wait_loadcnt_dscnt 0x0
9927; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9928; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
9929; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
9930; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
9931; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
9932; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
9933; GFX12-WGP-NEXT:    s_endpgm
9934;
9935; GFX12-CU-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
9936; GFX12-CU:       ; %bb.0: ; %entry
9937; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
9938; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
9939; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
9940; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
9941; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
9942; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
9943; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
9944; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
9945; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
9946; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
9947; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9948; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9949; GFX12-CU-NEXT:    s_wait_storecnt 0x0
9950; GFX12-CU-NEXT:    s_wait_loadcnt_dscnt 0x0
9951; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
9952; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
9953; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
9954; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
9955; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
9956; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
9957; GFX12-CU-NEXT:    s_endpgm
9958    ptr addrspace(1) %out, i32 %in, i32 %old) {
9959entry:
9960  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
9961  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
9962  %val0 = extractvalue { i32, i1 } %val, 0
9963  store i32 %val0, ptr addrspace(1) %out, align 4
9964  ret void
9965}
9966
9967define amdgpu_kernel void @global_system_one_as_unordered_load(
9968; GFX6-LABEL: global_system_one_as_unordered_load:
9969; GFX6:       ; %bb.0: ; %entry
9970; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
9971; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
9972; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
9973; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
9974; GFX6-NEXT:    s_mov_b32 s6, s9
9975; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
9976; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
9977; GFX6-NEXT:    s_mov_b32 s13, -1
9978; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
9979; GFX6-NEXT:    s_mov_b32 s9, s6
9980; GFX6-NEXT:    s_mov_b32 s10, s13
9981; GFX6-NEXT:    s_mov_b32 s11, s12
9982; GFX6-NEXT:    s_mov_b32 s14, s5
9983; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
9984; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
9985; GFX6-NEXT:    s_mov_b32 s5, s14
9986; GFX6-NEXT:    s_mov_b32 s6, s13
9987; GFX6-NEXT:    s_mov_b32 s7, s12
9988; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0
9989; GFX6-NEXT:    s_waitcnt vmcnt(0)
9990; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
9991; GFX6-NEXT:    s_endpgm
9992;
9993; GFX7-LABEL: global_system_one_as_unordered_load:
9994; GFX7:       ; %bb.0: ; %entry
9995; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
9996; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
9997; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
9998; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9999; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10000; GFX7-NEXT:    flat_load_dword v2, v[0:1]
10001; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10002; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10003; GFX7-NEXT:    s_waitcnt vmcnt(0)
10004; GFX7-NEXT:    flat_store_dword v[0:1], v2
10005; GFX7-NEXT:    s_endpgm
10006;
10007; GFX10-WGP-LABEL: global_system_one_as_unordered_load:
10008; GFX10-WGP:       ; %bb.0: ; %entry
10009; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10010; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10011; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10012; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10013; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7]
10014; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10015; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10016; GFX10-WGP-NEXT:    s_endpgm
10017;
10018; GFX10-CU-LABEL: global_system_one_as_unordered_load:
10019; GFX10-CU:       ; %bb.0: ; %entry
10020; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10021; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10022; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10023; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10024; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7]
10025; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10026; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10027; GFX10-CU-NEXT:    s_endpgm
10028;
10029; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load:
10030; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10031; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10032; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
10033; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10034; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10035; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
10036; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10037; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
10038; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
10039; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10040; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
10041; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
10042; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
10043; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
10044; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10045; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10046; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
10047; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
10048; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
10049; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0
10050; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10051; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10052; SKIP-CACHE-INV-NEXT:    s_endpgm
10053;
10054; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load:
10055; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10056; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10057; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10058; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10059; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10060; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
10061; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10062; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10063; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10064;
10065; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_load:
10066; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10067; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10068; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10069; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10070; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10071; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7]
10072; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10073; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10074; GFX90A-TGSPLIT-NEXT:    s_endpgm
10075;
10076; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_load:
10077; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10078; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10079; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10080; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10081; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10082; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
10083; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10084; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10085; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10086;
10087; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_load:
10088; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10089; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10090; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10091; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10092; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10093; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3]
10094; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10095; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10096; GFX940-TGSPLIT-NEXT:    s_endpgm
10097;
10098; GFX11-WGP-LABEL: global_system_one_as_unordered_load:
10099; GFX11-WGP:       ; %bb.0: ; %entry
10100; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10101; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10102; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10103; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10104; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
10105; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10106; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10107; GFX11-WGP-NEXT:    s_endpgm
10108;
10109; GFX11-CU-LABEL: global_system_one_as_unordered_load:
10110; GFX11-CU:       ; %bb.0: ; %entry
10111; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10112; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10113; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10114; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10115; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
10116; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10117; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10118; GFX11-CU-NEXT:    s_endpgm
10119;
10120; GFX12-WGP-LABEL: global_system_one_as_unordered_load:
10121; GFX12-WGP:       ; %bb.0: ; %entry
10122; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10123; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10124; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10125; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10126; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3]
10127; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10128; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10129; GFX12-WGP-NEXT:    s_endpgm
10130;
10131; GFX12-CU-LABEL: global_system_one_as_unordered_load:
10132; GFX12-CU:       ; %bb.0: ; %entry
10133; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10134; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10135; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10136; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10137; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3]
10138; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10139; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10140; GFX12-CU-NEXT:    s_endpgm
10141    ptr addrspace(1) %in, ptr addrspace(1) %out) {
10142entry:
10143  %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") unordered, align 4
10144  store i32 %val, ptr addrspace(1) %out
10145  ret void
10146}
10147
10148define amdgpu_kernel void @global_system_one_as_monotonic_load(
10149; GFX6-LABEL: global_system_one_as_monotonic_load:
10150; GFX6:       ; %bb.0: ; %entry
10151; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
10152; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
10153; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
10154; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10155; GFX6-NEXT:    s_mov_b32 s6, s9
10156; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
10157; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
10158; GFX6-NEXT:    s_mov_b32 s13, -1
10159; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
10160; GFX6-NEXT:    s_mov_b32 s9, s6
10161; GFX6-NEXT:    s_mov_b32 s10, s13
10162; GFX6-NEXT:    s_mov_b32 s11, s12
10163; GFX6-NEXT:    s_mov_b32 s14, s5
10164; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10165; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10166; GFX6-NEXT:    s_mov_b32 s5, s14
10167; GFX6-NEXT:    s_mov_b32 s6, s13
10168; GFX6-NEXT:    s_mov_b32 s7, s12
10169; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
10170; GFX6-NEXT:    s_waitcnt vmcnt(0)
10171; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10172; GFX6-NEXT:    s_endpgm
10173;
10174; GFX7-LABEL: global_system_one_as_monotonic_load:
10175; GFX7:       ; %bb.0: ; %entry
10176; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10177; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
10178; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10179; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10180; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10181; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
10182; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10183; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10184; GFX7-NEXT:    s_waitcnt vmcnt(0)
10185; GFX7-NEXT:    flat_store_dword v[0:1], v2
10186; GFX7-NEXT:    s_endpgm
10187;
10188; GFX10-WGP-LABEL: global_system_one_as_monotonic_load:
10189; GFX10-WGP:       ; %bb.0: ; %entry
10190; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10191; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10192; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10193; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10194; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10195; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10196; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10197; GFX10-WGP-NEXT:    s_endpgm
10198;
10199; GFX10-CU-LABEL: global_system_one_as_monotonic_load:
10200; GFX10-CU:       ; %bb.0: ; %entry
10201; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10202; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10203; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10204; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10205; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10206; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10207; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10208; GFX10-CU-NEXT:    s_endpgm
10209;
10210; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load:
10211; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10212; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10213; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
10214; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10215; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10216; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
10217; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10218; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
10219; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
10220; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10221; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
10222; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
10223; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
10224; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
10225; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10226; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10227; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
10228; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
10229; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
10230; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
10231; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10232; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10233; SKIP-CACHE-INV-NEXT:    s_endpgm
10234;
10235; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load:
10236; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10237; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10238; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10239; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10240; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10241; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10242; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10243; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10244; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10245;
10246; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_load:
10247; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10248; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10249; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10250; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10251; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10252; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10253; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10254; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10255; GFX90A-TGSPLIT-NEXT:    s_endpgm
10256;
10257; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_load:
10258; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10259; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10260; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10261; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10262; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10263; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10264; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10265; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10266; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10267;
10268; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_load:
10269; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10270; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10271; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10272; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10273; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10274; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10275; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10276; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10277; GFX940-TGSPLIT-NEXT:    s_endpgm
10278;
10279; GFX11-WGP-LABEL: global_system_one_as_monotonic_load:
10280; GFX11-WGP:       ; %bb.0: ; %entry
10281; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10282; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10283; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10284; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10285; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10286; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10287; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10288; GFX11-WGP-NEXT:    s_endpgm
10289;
10290; GFX11-CU-LABEL: global_system_one_as_monotonic_load:
10291; GFX11-CU:       ; %bb.0: ; %entry
10292; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10293; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10294; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10295; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10296; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10297; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10298; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10299; GFX11-CU-NEXT:    s_endpgm
10300;
10301; GFX12-WGP-LABEL: global_system_one_as_monotonic_load:
10302; GFX12-WGP:       ; %bb.0: ; %entry
10303; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10304; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10305; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10306; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10307; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10308; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10309; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10310; GFX12-WGP-NEXT:    s_endpgm
10311;
10312; GFX12-CU-LABEL: global_system_one_as_monotonic_load:
10313; GFX12-CU:       ; %bb.0: ; %entry
10314; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10315; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10316; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10317; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10318; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10319; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10320; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10321; GFX12-CU-NEXT:    s_endpgm
10322    ptr addrspace(1) %in, ptr addrspace(1) %out) {
10323entry:
10324  %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") monotonic, align 4
10325  store i32 %val, ptr addrspace(1) %out
10326  ret void
10327}
10328
10329define amdgpu_kernel void @global_system_one_as_acquire_load(
10330; GFX6-LABEL: global_system_one_as_acquire_load:
10331; GFX6:       ; %bb.0: ; %entry
10332; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
10333; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
10334; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
10335; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10336; GFX6-NEXT:    s_mov_b32 s6, s9
10337; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
10338; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
10339; GFX6-NEXT:    s_mov_b32 s13, -1
10340; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
10341; GFX6-NEXT:    s_mov_b32 s9, s6
10342; GFX6-NEXT:    s_mov_b32 s10, s13
10343; GFX6-NEXT:    s_mov_b32 s11, s12
10344; GFX6-NEXT:    s_mov_b32 s14, s5
10345; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10346; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10347; GFX6-NEXT:    s_mov_b32 s5, s14
10348; GFX6-NEXT:    s_mov_b32 s6, s13
10349; GFX6-NEXT:    s_mov_b32 s7, s12
10350; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
10351; GFX6-NEXT:    s_waitcnt vmcnt(0)
10352; GFX6-NEXT:    buffer_wbinvl1
10353; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10354; GFX6-NEXT:    s_endpgm
10355;
10356; GFX7-LABEL: global_system_one_as_acquire_load:
10357; GFX7:       ; %bb.0: ; %entry
10358; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10359; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
10360; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10361; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10362; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10363; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
10364; GFX7-NEXT:    s_waitcnt vmcnt(0)
10365; GFX7-NEXT:    buffer_wbinvl1_vol
10366; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10367; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10368; GFX7-NEXT:    flat_store_dword v[0:1], v2
10369; GFX7-NEXT:    s_endpgm
10370;
10371; GFX10-WGP-LABEL: global_system_one_as_acquire_load:
10372; GFX10-WGP:       ; %bb.0: ; %entry
10373; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10374; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10375; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10376; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10377; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10378; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10379; GFX10-WGP-NEXT:    buffer_gl1_inv
10380; GFX10-WGP-NEXT:    buffer_gl0_inv
10381; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10382; GFX10-WGP-NEXT:    s_endpgm
10383;
10384; GFX10-CU-LABEL: global_system_one_as_acquire_load:
10385; GFX10-CU:       ; %bb.0: ; %entry
10386; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10387; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10388; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10389; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10390; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10391; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10392; GFX10-CU-NEXT:    buffer_gl1_inv
10393; GFX10-CU-NEXT:    buffer_gl0_inv
10394; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10395; GFX10-CU-NEXT:    s_endpgm
10396;
10397; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load:
10398; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10399; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10400; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
10401; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10402; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10403; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
10404; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10405; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
10406; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
10407; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10408; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
10409; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
10410; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
10411; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
10412; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10413; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10414; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
10415; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
10416; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
10417; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
10418; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10419; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10420; SKIP-CACHE-INV-NEXT:    s_endpgm
10421;
10422; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load:
10423; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10424; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10425; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10426; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10427; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10428; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10429; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10430; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
10431; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
10432; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10433; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10434;
10435; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_load:
10436; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10437; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10438; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10439; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10440; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10441; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10442; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10443; GFX90A-TGSPLIT-NEXT:    buffer_invl2
10444; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
10445; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10446; GFX90A-TGSPLIT-NEXT:    s_endpgm
10447;
10448; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_load:
10449; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10450; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10451; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10452; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10453; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10454; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10455; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10456; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
10457; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10458; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10459;
10460; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_load:
10461; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10462; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10463; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10464; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10465; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10466; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10467; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10468; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
10469; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10470; GFX940-TGSPLIT-NEXT:    s_endpgm
10471;
10472; GFX11-WGP-LABEL: global_system_one_as_acquire_load:
10473; GFX11-WGP:       ; %bb.0: ; %entry
10474; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10475; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10476; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10477; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10478; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10479; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10480; GFX11-WGP-NEXT:    buffer_gl1_inv
10481; GFX11-WGP-NEXT:    buffer_gl0_inv
10482; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10483; GFX11-WGP-NEXT:    s_endpgm
10484;
10485; GFX11-CU-LABEL: global_system_one_as_acquire_load:
10486; GFX11-CU:       ; %bb.0: ; %entry
10487; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10488; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10489; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10490; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10491; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10492; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10493; GFX11-CU-NEXT:    buffer_gl1_inv
10494; GFX11-CU-NEXT:    buffer_gl0_inv
10495; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10496; GFX11-CU-NEXT:    s_endpgm
10497;
10498; GFX12-WGP-LABEL: global_system_one_as_acquire_load:
10499; GFX12-WGP:       ; %bb.0: ; %entry
10500; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10501; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10502; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10503; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10504; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10505; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10506; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
10507; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10508; GFX12-WGP-NEXT:    s_endpgm
10509;
10510; GFX12-CU-LABEL: global_system_one_as_acquire_load:
10511; GFX12-CU:       ; %bb.0: ; %entry
10512; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10513; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10514; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10515; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10516; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10517; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10518; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
10519; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10520; GFX12-CU-NEXT:    s_endpgm
10521    ptr addrspace(1) %in, ptr addrspace(1) %out) {
10522entry:
10523  %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") acquire, align 4
10524  store i32 %val, ptr addrspace(1) %out
10525  ret void
10526}
10527
10528define amdgpu_kernel void @global_system_one_as_seq_cst_load(
10529; GFX6-LABEL: global_system_one_as_seq_cst_load:
10530; GFX6:       ; %bb.0: ; %entry
10531; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
10532; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
10533; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
10534; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10535; GFX6-NEXT:    s_mov_b32 s6, s9
10536; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 killed $sgpr8_sgpr9
10537; GFX6-NEXT:    s_mov_b32 s12, 0x100f000
10538; GFX6-NEXT:    s_mov_b32 s13, -1
10539; GFX6-NEXT:    ; kill: def $sgpr8 killed $sgpr8 def $sgpr8_sgpr9_sgpr10_sgpr11
10540; GFX6-NEXT:    s_mov_b32 s9, s6
10541; GFX6-NEXT:    s_mov_b32 s10, s13
10542; GFX6-NEXT:    s_mov_b32 s11, s12
10543; GFX6-NEXT:    s_mov_b32 s14, s5
10544; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10545; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10546; GFX6-NEXT:    s_mov_b32 s5, s14
10547; GFX6-NEXT:    s_mov_b32 s6, s13
10548; GFX6-NEXT:    s_mov_b32 s7, s12
10549; GFX6-NEXT:    s_waitcnt vmcnt(0)
10550; GFX6-NEXT:    buffer_load_dword v0, off, s[8:11], 0 glc
10551; GFX6-NEXT:    s_waitcnt vmcnt(0)
10552; GFX6-NEXT:    buffer_wbinvl1
10553; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10554; GFX6-NEXT:    s_endpgm
10555;
10556; GFX7-LABEL: global_system_one_as_seq_cst_load:
10557; GFX7:       ; %bb.0: ; %entry
10558; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10559; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x2
10560; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10561; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10562; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10563; GFX7-NEXT:    s_waitcnt vmcnt(0)
10564; GFX7-NEXT:    flat_load_dword v2, v[0:1] glc
10565; GFX7-NEXT:    s_waitcnt vmcnt(0)
10566; GFX7-NEXT:    buffer_wbinvl1_vol
10567; GFX7-NEXT:    v_mov_b32_e32 v0, s4
10568; GFX7-NEXT:    v_mov_b32_e32 v1, s5
10569; GFX7-NEXT:    flat_store_dword v[0:1], v2
10570; GFX7-NEXT:    s_endpgm
10571;
10572; GFX10-WGP-LABEL: global_system_one_as_seq_cst_load:
10573; GFX10-WGP:       ; %bb.0: ; %entry
10574; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10575; GFX10-WGP-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10576; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10577; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10578; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10579; GFX10-WGP-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10580; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
10581; GFX10-WGP-NEXT:    buffer_gl1_inv
10582; GFX10-WGP-NEXT:    buffer_gl0_inv
10583; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10584; GFX10-WGP-NEXT:    s_endpgm
10585;
10586; GFX10-CU-LABEL: global_system_one_as_seq_cst_load:
10587; GFX10-CU:       ; %bb.0: ; %entry
10588; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10589; GFX10-CU-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10590; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10591; GFX10-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10592; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10593; GFX10-CU-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
10594; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
10595; GFX10-CU-NEXT:    buffer_gl1_inv
10596; GFX10-CU-NEXT:    buffer_gl0_inv
10597; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10598; GFX10-CU-NEXT:    s_endpgm
10599;
10600; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load:
10601; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10602; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10603; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
10604; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10605; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10606; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s5
10607; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10608; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, 0xf000
10609; SKIP-CACHE-INV-NEXT:    s_mov_b32 s9, -1
10610; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10611; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, s2
10612; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, s9
10613; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s8
10614; SKIP-CACHE-INV-NEXT:    s_mov_b32 s10, s1
10615; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10616; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10617; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s10
10618; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s9
10619; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s8
10620; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10621; SKIP-CACHE-INV-NEXT:    buffer_load_dword v0, off, s[4:7], 0 glc
10622; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
10623; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10624; SKIP-CACHE-INV-NEXT:    s_endpgm
10625;
10626; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load:
10627; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10628; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10629; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10630; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10631; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10632; GFX90A-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10633; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10634; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
10635; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
10636; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10637; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10638;
10639; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_load:
10640; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10641; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10642; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
10643; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10644; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10645; GFX90A-TGSPLIT-NEXT:    global_load_dword v1, v0, s[6:7] glc
10646; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10647; GFX90A-TGSPLIT-NEXT:    buffer_invl2
10648; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
10649; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10650; GFX90A-TGSPLIT-NEXT:    s_endpgm
10651;
10652; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_load:
10653; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10654; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10655; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10656; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10657; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10658; GFX940-NOTTGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10659; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10660; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
10661; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10662; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10663;
10664; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_load:
10665; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10666; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10667; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
10668; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10669; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10670; GFX940-TGSPLIT-NEXT:    global_load_dword v1, v0, s[2:3] sc0 sc1
10671; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
10672; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
10673; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10674; GFX940-TGSPLIT-NEXT:    s_endpgm
10675;
10676; GFX11-WGP-LABEL: global_system_one_as_seq_cst_load:
10677; GFX11-WGP:       ; %bb.0: ; %entry
10678; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10679; GFX11-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10680; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10681; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10682; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
10683; GFX11-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10684; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
10685; GFX11-WGP-NEXT:    buffer_gl1_inv
10686; GFX11-WGP-NEXT:    buffer_gl0_inv
10687; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10688; GFX11-WGP-NEXT:    s_endpgm
10689;
10690; GFX11-CU-LABEL: global_system_one_as_seq_cst_load:
10691; GFX11-CU:       ; %bb.0: ; %entry
10692; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10693; GFX11-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10694; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10695; GFX11-CU-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
10696; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
10697; GFX11-CU-NEXT:    global_load_b32 v1, v0, s[2:3] glc
10698; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
10699; GFX11-CU-NEXT:    buffer_gl1_inv
10700; GFX11-CU-NEXT:    buffer_gl0_inv
10701; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10702; GFX11-CU-NEXT:    s_endpgm
10703;
10704; GFX12-WGP-LABEL: global_system_one_as_seq_cst_load:
10705; GFX12-WGP:       ; %bb.0: ; %entry
10706; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10707; GFX12-WGP-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10708; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10709; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10710; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10711; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10712; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
10713; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10714; GFX12-WGP-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10715; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
10716; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
10717; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
10718; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
10719; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10720; GFX12-WGP-NEXT:    s_endpgm
10721;
10722; GFX12-CU-LABEL: global_system_one_as_seq_cst_load:
10723; GFX12-CU:       ; %bb.0: ; %entry
10724; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10725; GFX12-CU-NEXT:    s_load_b64 s[2:3], s[4:5], 0x0
10726; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10727; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10728; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10729; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10730; GFX12-CU-NEXT:    s_wait_storecnt 0x0
10731; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10732; GFX12-CU-NEXT:    global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS
10733; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
10734; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
10735; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
10736; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
10737; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10738; GFX12-CU-NEXT:    s_endpgm
10739    ptr addrspace(1) %in, ptr addrspace(1) %out) {
10740entry:
10741  %val = load atomic i32, ptr addrspace(1) %in syncscope("one-as") seq_cst, align 4
10742  store i32 %val, ptr addrspace(1) %out
10743  ret void
10744}
10745
10746define amdgpu_kernel void @global_system_one_as_unordered_store(
10747; GFX6-LABEL: global_system_one_as_unordered_store:
10748; GFX6:       ; %bb.0: ; %entry
10749; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
10750; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
10751; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
10752; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10753; GFX6-NEXT:    s_mov_b32 s11, s5
10754; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10755; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
10756; GFX6-NEXT:    s_mov_b32 s10, -1
10757; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10758; GFX6-NEXT:    s_mov_b32 s5, s11
10759; GFX6-NEXT:    s_mov_b32 s6, s10
10760; GFX6-NEXT:    s_mov_b32 s7, s9
10761; GFX6-NEXT:    v_mov_b32_e32 v0, s8
10762; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10763; GFX6-NEXT:    s_endpgm
10764;
10765; GFX7-LABEL: global_system_one_as_unordered_store:
10766; GFX7:       ; %bb.0: ; %entry
10767; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
10768; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
10769; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10770; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10771; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10772; GFX7-NEXT:    v_mov_b32_e32 v2, s4
10773; GFX7-NEXT:    flat_store_dword v[0:1], v2
10774; GFX7-NEXT:    s_endpgm
10775;
10776; GFX10-WGP-LABEL: global_system_one_as_unordered_store:
10777; GFX10-WGP:       ; %bb.0: ; %entry
10778; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
10779; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10780; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10781; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10782; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
10783; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10784; GFX10-WGP-NEXT:    s_endpgm
10785;
10786; GFX10-CU-LABEL: global_system_one_as_unordered_store:
10787; GFX10-CU:       ; %bb.0: ; %entry
10788; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
10789; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10790; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10791; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10792; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
10793; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10794; GFX10-CU-NEXT:    s_endpgm
10795;
10796; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store:
10797; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10798; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10799; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
10800; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10801; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10802; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
10803; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10804; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
10805; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
10806; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
10808; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
10809; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
10810; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
10811; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10812; SKIP-CACHE-INV-NEXT:    s_endpgm
10813;
10814; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store:
10815; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10816; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
10817; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10818; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10819; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10820; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10821; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10822; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10823;
10824; GFX90A-TGSPLIT-LABEL: global_system_one_as_unordered_store:
10825; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10826; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
10827; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10828; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10829; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10830; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10831; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10832; GFX90A-TGSPLIT-NEXT:    s_endpgm
10833;
10834; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_unordered_store:
10835; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10836; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
10837; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10838; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10839; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10840; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10841; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10842; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10843;
10844; GFX940-TGSPLIT-LABEL: global_system_one_as_unordered_store:
10845; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10846; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
10847; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10848; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10849; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10850; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10851; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10852; GFX940-TGSPLIT-NEXT:    s_endpgm
10853;
10854; GFX11-WGP-LABEL: global_system_one_as_unordered_store:
10855; GFX11-WGP:       ; %bb.0: ; %entry
10856; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
10857; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10858; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
10859; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10860; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
10861; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10862; GFX11-WGP-NEXT:    s_endpgm
10863;
10864; GFX11-CU-LABEL: global_system_one_as_unordered_store:
10865; GFX11-CU:       ; %bb.0: ; %entry
10866; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
10867; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10868; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
10869; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
10870; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
10871; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10872; GFX11-CU-NEXT:    s_endpgm
10873;
10874; GFX12-WGP-LABEL: global_system_one_as_unordered_store:
10875; GFX12-WGP:       ; %bb.0: ; %entry
10876; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
10877; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10878; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
10879; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
10880; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
10881; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
10882; GFX12-WGP-NEXT:    s_endpgm
10883;
10884; GFX12-CU-LABEL: global_system_one_as_unordered_store:
10885; GFX12-CU:       ; %bb.0: ; %entry
10886; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
10887; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
10888; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
10889; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
10890; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
10891; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
10892; GFX12-CU-NEXT:    s_endpgm
10893    i32 %in, ptr addrspace(1) %out) {
10894entry:
10895  store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") unordered, align 4
10896  ret void
10897}
10898
10899define amdgpu_kernel void @global_system_one_as_monotonic_store(
10900; GFX6-LABEL: global_system_one_as_monotonic_store:
10901; GFX6:       ; %bb.0: ; %entry
10902; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
10903; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
10904; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
10905; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
10906; GFX6-NEXT:    s_mov_b32 s11, s5
10907; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
10908; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
10909; GFX6-NEXT:    s_mov_b32 s10, -1
10910; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
10911; GFX6-NEXT:    s_mov_b32 s5, s11
10912; GFX6-NEXT:    s_mov_b32 s6, s10
10913; GFX6-NEXT:    s_mov_b32 s7, s9
10914; GFX6-NEXT:    v_mov_b32_e32 v0, s8
10915; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
10916; GFX6-NEXT:    s_endpgm
10917;
10918; GFX7-LABEL: global_system_one_as_monotonic_store:
10919; GFX7:       ; %bb.0: ; %entry
10920; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
10921; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
10922; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
10923; GFX7-NEXT:    v_mov_b32_e32 v0, s6
10924; GFX7-NEXT:    v_mov_b32_e32 v1, s7
10925; GFX7-NEXT:    v_mov_b32_e32 v2, s4
10926; GFX7-NEXT:    flat_store_dword v[0:1], v2
10927; GFX7-NEXT:    s_endpgm
10928;
10929; GFX10-WGP-LABEL: global_system_one_as_monotonic_store:
10930; GFX10-WGP:       ; %bb.0: ; %entry
10931; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
10932; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10933; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
10934; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
10935; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
10936; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
10937; GFX10-WGP-NEXT:    s_endpgm
10938;
10939; GFX10-CU-LABEL: global_system_one_as_monotonic_store:
10940; GFX10-CU:       ; %bb.0: ; %entry
10941; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
10942; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10943; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
10944; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
10945; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
10946; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
10947; GFX10-CU-NEXT:    s_endpgm
10948;
10949; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store:
10950; SKIP-CACHE-INV:       ; %bb.0: ; %entry
10951; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
10952; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
10953; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
10954; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
10955; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
10956; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
10957; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
10958; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
10959; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
10960; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
10961; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
10962; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
10963; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
10964; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
10965; SKIP-CACHE-INV-NEXT:    s_endpgm
10966;
10967; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store:
10968; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
10969; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
10970; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10971; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10972; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10973; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10974; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10975; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
10976;
10977; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_store:
10978; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
10979; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
10980; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
10981; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10982; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10983; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
10984; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
10985; GFX90A-TGSPLIT-NEXT:    s_endpgm
10986;
10987; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_store:
10988; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
10989; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
10990; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
10991; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
10992; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
10993; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
10994; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
10995; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
10996;
10997; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_store:
10998; GFX940-TGSPLIT:       ; %bb.0: ; %entry
10999; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
11000; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11001; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11002; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11003; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11004; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11005; GFX940-TGSPLIT-NEXT:    s_endpgm
11006;
11007; GFX11-WGP-LABEL: global_system_one_as_monotonic_store:
11008; GFX11-WGP:       ; %bb.0: ; %entry
11009; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11010; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11011; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11012; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11013; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11014; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11015; GFX11-WGP-NEXT:    s_endpgm
11016;
11017; GFX11-CU-LABEL: global_system_one_as_monotonic_store:
11018; GFX11-CU:       ; %bb.0: ; %entry
11019; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11020; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11021; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11022; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11023; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11024; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11025; GFX11-CU-NEXT:    s_endpgm
11026;
11027; GFX12-WGP-LABEL: global_system_one_as_monotonic_store:
11028; GFX12-WGP:       ; %bb.0: ; %entry
11029; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11030; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11031; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11032; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11033; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11034; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11035; GFX12-WGP-NEXT:    s_endpgm
11036;
11037; GFX12-CU-LABEL: global_system_one_as_monotonic_store:
11038; GFX12-CU:       ; %bb.0: ; %entry
11039; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11040; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11041; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11042; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11043; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11044; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11045; GFX12-CU-NEXT:    s_endpgm
11046    i32 %in, ptr addrspace(1) %out) {
11047entry:
11048  store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") monotonic, align 4
11049  ret void
11050}
11051
11052define amdgpu_kernel void @global_system_one_as_release_store(
11053; GFX6-LABEL: global_system_one_as_release_store:
11054; GFX6:       ; %bb.0: ; %entry
11055; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11056; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
11057; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11058; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11059; GFX6-NEXT:    s_mov_b32 s11, s5
11060; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11061; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11062; GFX6-NEXT:    s_mov_b32 s10, -1
11063; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11064; GFX6-NEXT:    s_mov_b32 s5, s11
11065; GFX6-NEXT:    s_mov_b32 s6, s10
11066; GFX6-NEXT:    s_mov_b32 s7, s9
11067; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11068; GFX6-NEXT:    s_waitcnt vmcnt(0)
11069; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11070; GFX6-NEXT:    s_endpgm
11071;
11072; GFX7-LABEL: global_system_one_as_release_store:
11073; GFX7:       ; %bb.0: ; %entry
11074; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
11075; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
11076; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11077; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11078; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11079; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11080; GFX7-NEXT:    s_waitcnt vmcnt(0)
11081; GFX7-NEXT:    flat_store_dword v[0:1], v2
11082; GFX7-NEXT:    s_endpgm
11083;
11084; GFX10-WGP-LABEL: global_system_one_as_release_store:
11085; GFX10-WGP:       ; %bb.0: ; %entry
11086; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
11087; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11088; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11089; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11090; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11091; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11092; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11093; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11094; GFX10-WGP-NEXT:    s_endpgm
11095;
11096; GFX10-CU-LABEL: global_system_one_as_release_store:
11097; GFX10-CU:       ; %bb.0: ; %entry
11098; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
11099; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11100; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11101; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11102; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11103; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11104; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11105; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11106; GFX10-CU-NEXT:    s_endpgm
11107;
11108; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store:
11109; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11110; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11111; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
11112; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11113; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11114; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11115; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11116; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11117; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11118; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11119; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
11120; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
11121; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
11122; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
11123; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11124; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11125; SKIP-CACHE-INV-NEXT:    s_endpgm
11126;
11127; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_store:
11128; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11129; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
11130; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11131; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11132; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11133; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11134; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
11135; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11136; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11137; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11138;
11139; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_store:
11140; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11141; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
11142; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11143; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11144; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11145; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11146; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
11147; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11148; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11149; GFX90A-TGSPLIT-NEXT:    s_endpgm
11150;
11151; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_store:
11152; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11153; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
11154; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11155; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11156; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11157; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11158; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11159; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11160; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11161; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11162;
11163; GFX940-TGSPLIT-LABEL: global_system_one_as_release_store:
11164; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11165; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
11166; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11167; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11168; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11169; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11170; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11171; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11172; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11173; GFX940-TGSPLIT-NEXT:    s_endpgm
11174;
11175; GFX11-WGP-LABEL: global_system_one_as_release_store:
11176; GFX11-WGP:       ; %bb.0: ; %entry
11177; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11178; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11179; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11180; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11181; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11182; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11183; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11184; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11185; GFX11-WGP-NEXT:    s_endpgm
11186;
11187; GFX11-CU-LABEL: global_system_one_as_release_store:
11188; GFX11-CU:       ; %bb.0: ; %entry
11189; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11190; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11191; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11192; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11193; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11194; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11195; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11196; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11197; GFX11-CU-NEXT:    s_endpgm
11198;
11199; GFX12-WGP-LABEL: global_system_one_as_release_store:
11200; GFX12-WGP:       ; %bb.0: ; %entry
11201; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11202; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11203; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11204; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11205; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11206; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
11207; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11208; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11209; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11210; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11211; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11212; GFX12-WGP-NEXT:    s_endpgm
11213;
11214; GFX12-CU-LABEL: global_system_one_as_release_store:
11215; GFX12-CU:       ; %bb.0: ; %entry
11216; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11217; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11218; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11219; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11220; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11221; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
11222; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11223; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11224; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11225; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11226; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11227; GFX12-CU-NEXT:    s_endpgm
11228    i32 %in, ptr addrspace(1) %out) {
11229entry:
11230  store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") release, align 4
11231  ret void
11232}
11233
11234define amdgpu_kernel void @global_system_one_as_seq_cst_store(
11235; GFX6-LABEL: global_system_one_as_seq_cst_store:
11236; GFX6:       ; %bb.0: ; %entry
11237; GFX6-NEXT:    s_mov_b64 s[4:5], s[8:9]
11238; GFX6-NEXT:    s_load_dword s8, s[4:5], 0x0
11239; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x2
11240; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11241; GFX6-NEXT:    s_mov_b32 s11, s5
11242; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11243; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11244; GFX6-NEXT:    s_mov_b32 s10, -1
11245; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11246; GFX6-NEXT:    s_mov_b32 s5, s11
11247; GFX6-NEXT:    s_mov_b32 s6, s10
11248; GFX6-NEXT:    s_mov_b32 s7, s9
11249; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11250; GFX6-NEXT:    s_waitcnt vmcnt(0)
11251; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
11252; GFX6-NEXT:    s_endpgm
11253;
11254; GFX7-LABEL: global_system_one_as_seq_cst_store:
11255; GFX7:       ; %bb.0: ; %entry
11256; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x0
11257; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x2
11258; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11259; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11260; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11261; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11262; GFX7-NEXT:    s_waitcnt vmcnt(0)
11263; GFX7-NEXT:    flat_store_dword v[0:1], v2
11264; GFX7-NEXT:    s_endpgm
11265;
11266; GFX10-WGP-LABEL: global_system_one_as_seq_cst_store:
11267; GFX10-WGP:       ; %bb.0: ; %entry
11268; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x0
11269; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11270; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11271; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11272; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11273; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11274; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11275; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
11276; GFX10-WGP-NEXT:    s_endpgm
11277;
11278; GFX10-CU-LABEL: global_system_one_as_seq_cst_store:
11279; GFX10-CU:       ; %bb.0: ; %entry
11280; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x0
11281; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11282; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11283; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11284; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11285; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11286; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11287; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
11288; GFX10-CU-NEXT:    s_endpgm
11289;
11290; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store:
11291; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11292; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[0:1], s[4:5]
11293; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
11294; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
11295; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11296; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11297; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11298; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11299; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11300; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11301; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
11302; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
11303; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
11304; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
11305; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11306; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
11307; SKIP-CACHE-INV-NEXT:    s_endpgm
11308;
11309; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store:
11310; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11311; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
11312; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11313; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11314; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11315; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11316; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
11317; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11318; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11319; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11320;
11321; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_store:
11322; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11323; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x0
11324; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x8
11325; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11326; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11327; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11328; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
11329; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11330; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
11331; GFX90A-TGSPLIT-NEXT:    s_endpgm
11332;
11333; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_store:
11334; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11335; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
11336; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11337; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11338; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11339; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11340; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11341; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11342; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11343; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11344;
11345; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_store:
11346; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11347; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x0
11348; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
11349; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11350; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11351; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11352; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11353; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11354; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
11355; GFX940-TGSPLIT-NEXT:    s_endpgm
11356;
11357; GFX11-WGP-LABEL: global_system_one_as_seq_cst_store:
11358; GFX11-WGP:       ; %bb.0: ; %entry
11359; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11360; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11361; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11362; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11363; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11364; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11365; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11366; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
11367; GFX11-WGP-NEXT:    s_endpgm
11368;
11369; GFX11-CU-LABEL: global_system_one_as_seq_cst_store:
11370; GFX11-CU:       ; %bb.0: ; %entry
11371; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11372; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11373; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11374; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11375; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11376; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11377; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11378; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
11379; GFX11-CU-NEXT:    s_endpgm
11380;
11381; GFX12-WGP-LABEL: global_system_one_as_seq_cst_store:
11382; GFX12-WGP:       ; %bb.0: ; %entry
11383; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x0
11384; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11385; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11386; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11387; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11388; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
11389; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11390; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11391; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11392; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11393; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11394; GFX12-WGP-NEXT:    s_endpgm
11395;
11396; GFX12-CU-LABEL: global_system_one_as_seq_cst_store:
11397; GFX12-CU:       ; %bb.0: ; %entry
11398; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x0
11399; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x8
11400; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11401; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11402; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11403; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
11404; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11405; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11406; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11407; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11408; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11409; GFX12-CU-NEXT:    s_endpgm
11410    i32 %in, ptr addrspace(1) %out) {
11411entry:
11412  store atomic i32 %in, ptr addrspace(1) %out syncscope("one-as") seq_cst, align 4
11413  ret void
11414}
11415
11416define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
11417; GFX6-LABEL: global_system_one_as_monotonic_atomicrmw:
11418; GFX6:       ; %bb.0: ; %entry
11419; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11420; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
11421; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11422; GFX6-NEXT:    s_mov_b32 s11, s5
11423; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11424; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11425; GFX6-NEXT:    s_mov_b32 s10, -1
11426; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11427; GFX6-NEXT:    s_mov_b32 s5, s11
11428; GFX6-NEXT:    s_mov_b32 s6, s10
11429; GFX6-NEXT:    s_mov_b32 s7, s9
11430; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11431; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
11432; GFX6-NEXT:    s_endpgm
11433;
11434; GFX7-LABEL: global_system_one_as_monotonic_atomicrmw:
11435; GFX7:       ; %bb.0: ; %entry
11436; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11437; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
11438; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11439; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11440; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11441; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11442; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
11443; GFX7-NEXT:    s_endpgm
11444;
11445; GFX10-WGP-LABEL: global_system_one_as_monotonic_atomicrmw:
11446; GFX10-WGP:       ; %bb.0: ; %entry
11447; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11448; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11449; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
11450; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11451; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11452; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
11453; GFX10-WGP-NEXT:    s_endpgm
11454;
11455; GFX10-CU-LABEL: global_system_one_as_monotonic_atomicrmw:
11456; GFX10-CU:       ; %bb.0: ; %entry
11457; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11458; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11459; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
11460; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11461; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11462; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
11463; GFX10-CU-NEXT:    s_endpgm
11464;
11465; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw:
11466; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11467; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11468; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
11469; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11470; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11471; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11472; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11473; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11474; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11475; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
11476; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
11477; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
11478; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
11479; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
11480; SKIP-CACHE-INV-NEXT:    s_endpgm
11481;
11482; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw:
11483; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11484; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11485; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11486; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11487; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11488; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11489; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11490; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11491;
11492; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw:
11493; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11494; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11495; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11496; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11497; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11498; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11499; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11500; GFX90A-TGSPLIT-NEXT:    s_endpgm
11501;
11502; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw:
11503; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11504; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11505; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11506; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11507; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11508; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11509; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11510; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11511;
11512; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_atomicrmw:
11513; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11514; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11515; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11516; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11517; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11518; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11519; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11520; GFX940-TGSPLIT-NEXT:    s_endpgm
11521;
11522; GFX11-WGP-LABEL: global_system_one_as_monotonic_atomicrmw:
11523; GFX11-WGP:       ; %bb.0: ; %entry
11524; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11525; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11526; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11527; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11528; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11529; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11530; GFX11-WGP-NEXT:    s_endpgm
11531;
11532; GFX11-CU-LABEL: global_system_one_as_monotonic_atomicrmw:
11533; GFX11-CU:       ; %bb.0: ; %entry
11534; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11535; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11536; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11537; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11538; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11539; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11540; GFX11-CU-NEXT:    s_endpgm
11541;
11542; GFX12-WGP-LABEL: global_system_one_as_monotonic_atomicrmw:
11543; GFX12-WGP:       ; %bb.0: ; %entry
11544; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11545; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11546; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11547; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11548; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11549; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11550; GFX12-WGP-NEXT:    s_endpgm
11551;
11552; GFX12-CU-LABEL: global_system_one_as_monotonic_atomicrmw:
11553; GFX12-CU:       ; %bb.0: ; %entry
11554; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11555; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11556; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11557; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11558; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11559; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11560; GFX12-CU-NEXT:    s_endpgm
11561    ptr addrspace(1) %out, i32 %in) {
11562entry:
11563  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") monotonic
11564  ret void
11565}
11566
11567define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
11568; GFX6-LABEL: global_system_one_as_acquire_atomicrmw:
11569; GFX6:       ; %bb.0: ; %entry
11570; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11571; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
11572; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11573; GFX6-NEXT:    s_mov_b32 s11, s5
11574; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11575; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11576; GFX6-NEXT:    s_mov_b32 s10, -1
11577; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11578; GFX6-NEXT:    s_mov_b32 s5, s11
11579; GFX6-NEXT:    s_mov_b32 s6, s10
11580; GFX6-NEXT:    s_mov_b32 s7, s9
11581; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11582; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
11583; GFX6-NEXT:    s_waitcnt vmcnt(0)
11584; GFX6-NEXT:    buffer_wbinvl1
11585; GFX6-NEXT:    s_endpgm
11586;
11587; GFX7-LABEL: global_system_one_as_acquire_atomicrmw:
11588; GFX7:       ; %bb.0: ; %entry
11589; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11590; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
11591; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11592; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11593; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11594; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11595; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
11596; GFX7-NEXT:    s_waitcnt vmcnt(0)
11597; GFX7-NEXT:    buffer_wbinvl1_vol
11598; GFX7-NEXT:    s_endpgm
11599;
11600; GFX10-WGP-LABEL: global_system_one_as_acquire_atomicrmw:
11601; GFX10-WGP:       ; %bb.0: ; %entry
11602; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11603; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11604; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
11605; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11606; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11607; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
11608; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11609; GFX10-WGP-NEXT:    buffer_gl1_inv
11610; GFX10-WGP-NEXT:    buffer_gl0_inv
11611; GFX10-WGP-NEXT:    s_endpgm
11612;
11613; GFX10-CU-LABEL: global_system_one_as_acquire_atomicrmw:
11614; GFX10-CU:       ; %bb.0: ; %entry
11615; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11616; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11617; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
11618; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11619; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11620; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
11621; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11622; GFX10-CU-NEXT:    buffer_gl1_inv
11623; GFX10-CU-NEXT:    buffer_gl0_inv
11624; GFX10-CU-NEXT:    s_endpgm
11625;
11626; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw:
11627; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11628; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11629; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
11630; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11631; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11632; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11633; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11634; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11635; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11636; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
11637; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
11638; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
11639; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
11640; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
11641; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11642; SKIP-CACHE-INV-NEXT:    s_endpgm
11643;
11644; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw:
11645; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11646; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11647; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11648; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11649; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11650; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11651; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11652; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11653; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
11654; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
11655; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11656;
11657; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw:
11658; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11659; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11660; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11661; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11662; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11663; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11664; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11665; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11666; GFX90A-TGSPLIT-NEXT:    buffer_invl2
11667; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
11668; GFX90A-TGSPLIT-NEXT:    s_endpgm
11669;
11670; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw:
11671; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11672; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11673; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11674; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11675; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11676; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11677; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11678; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11679; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
11680; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11681;
11682; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_atomicrmw:
11683; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11684; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11685; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11686; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11687; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11688; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11689; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11690; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11691; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
11692; GFX940-TGSPLIT-NEXT:    s_endpgm
11693;
11694; GFX11-WGP-LABEL: global_system_one_as_acquire_atomicrmw:
11695; GFX11-WGP:       ; %bb.0: ; %entry
11696; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11697; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11698; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11699; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11700; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11701; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11702; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11703; GFX11-WGP-NEXT:    buffer_gl1_inv
11704; GFX11-WGP-NEXT:    buffer_gl0_inv
11705; GFX11-WGP-NEXT:    s_endpgm
11706;
11707; GFX11-CU-LABEL: global_system_one_as_acquire_atomicrmw:
11708; GFX11-CU:       ; %bb.0: ; %entry
11709; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11710; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11711; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11712; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11713; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11714; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11715; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11716; GFX11-CU-NEXT:    buffer_gl1_inv
11717; GFX11-CU-NEXT:    buffer_gl0_inv
11718; GFX11-CU-NEXT:    s_endpgm
11719;
11720; GFX12-WGP-LABEL: global_system_one_as_acquire_atomicrmw:
11721; GFX12-WGP:       ; %bb.0: ; %entry
11722; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11723; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11724; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11725; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11726; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11727; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11728; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11729; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
11730; GFX12-WGP-NEXT:    s_endpgm
11731;
11732; GFX12-CU-LABEL: global_system_one_as_acquire_atomicrmw:
11733; GFX12-CU:       ; %bb.0: ; %entry
11734; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11735; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11736; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11737; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11738; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11739; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11740; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11741; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
11742; GFX12-CU-NEXT:    s_endpgm
11743    ptr addrspace(1) %out, i32 %in) {
11744entry:
11745  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
11746  ret void
11747}
11748
11749define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
11750; GFX6-LABEL: global_system_one_as_release_atomicrmw:
11751; GFX6:       ; %bb.0: ; %entry
11752; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11753; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
11754; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11755; GFX6-NEXT:    s_mov_b32 s11, s5
11756; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11757; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11758; GFX6-NEXT:    s_mov_b32 s10, -1
11759; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11760; GFX6-NEXT:    s_mov_b32 s5, s11
11761; GFX6-NEXT:    s_mov_b32 s6, s10
11762; GFX6-NEXT:    s_mov_b32 s7, s9
11763; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11764; GFX6-NEXT:    s_waitcnt vmcnt(0)
11765; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
11766; GFX6-NEXT:    s_endpgm
11767;
11768; GFX7-LABEL: global_system_one_as_release_atomicrmw:
11769; GFX7:       ; %bb.0: ; %entry
11770; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11771; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
11772; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11773; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11774; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11775; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11776; GFX7-NEXT:    s_waitcnt vmcnt(0)
11777; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
11778; GFX7-NEXT:    s_endpgm
11779;
11780; GFX10-WGP-LABEL: global_system_one_as_release_atomicrmw:
11781; GFX10-WGP:       ; %bb.0: ; %entry
11782; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11783; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11784; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
11785; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11786; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11787; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11788; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11789; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
11790; GFX10-WGP-NEXT:    s_endpgm
11791;
11792; GFX10-CU-LABEL: global_system_one_as_release_atomicrmw:
11793; GFX10-CU:       ; %bb.0: ; %entry
11794; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11795; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11796; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
11797; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11798; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11799; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11800; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11801; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
11802; GFX10-CU-NEXT:    s_endpgm
11803;
11804; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw:
11805; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11806; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11807; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
11808; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11809; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
11810; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
11811; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
11812; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
11813; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
11814; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
11815; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
11816; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
11817; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
11818; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
11819; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
11820; SKIP-CACHE-INV-NEXT:    s_endpgm
11821;
11822; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw:
11823; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
11824; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11825; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11826; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11827; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11828; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11829; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
11830; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11831; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11832; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
11833;
11834; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw:
11835; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
11836; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11837; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11838; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
11839; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11840; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
11841; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
11842; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11843; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
11844; GFX90A-TGSPLIT-NEXT:    s_endpgm
11845;
11846; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_atomicrmw:
11847; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
11848; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11849; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11850; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11851; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11852; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11853; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11854; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11855; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11856; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
11857;
11858; GFX940-TGSPLIT-LABEL: global_system_one_as_release_atomicrmw:
11859; GFX940-TGSPLIT:       ; %bb.0: ; %entry
11860; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
11861; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11862; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
11863; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
11864; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
11865; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
11866; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
11867; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
11868; GFX940-TGSPLIT-NEXT:    s_endpgm
11869;
11870; GFX11-WGP-LABEL: global_system_one_as_release_atomicrmw:
11871; GFX11-WGP:       ; %bb.0: ; %entry
11872; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
11873; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11874; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11875; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11876; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
11877; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
11878; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11879; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11880; GFX11-WGP-NEXT:    s_endpgm
11881;
11882; GFX11-CU-LABEL: global_system_one_as_release_atomicrmw:
11883; GFX11-CU:       ; %bb.0: ; %entry
11884; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
11885; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11886; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11887; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
11888; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
11889; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
11890; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11891; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
11892; GFX11-CU-NEXT:    s_endpgm
11893;
11894; GFX12-WGP-LABEL: global_system_one_as_release_atomicrmw:
11895; GFX12-WGP:       ; %bb.0: ; %entry
11896; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
11897; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11898; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
11899; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
11900; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
11901; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
11902; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
11903; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
11904; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
11905; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
11906; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11907; GFX12-WGP-NEXT:    s_endpgm
11908;
11909; GFX12-CU-LABEL: global_system_one_as_release_atomicrmw:
11910; GFX12-CU:       ; %bb.0: ; %entry
11911; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
11912; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
11913; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
11914; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
11915; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
11916; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
11917; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
11918; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
11919; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
11920; GFX12-CU-NEXT:    s_wait_storecnt 0x0
11921; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
11922; GFX12-CU-NEXT:    s_endpgm
11923    ptr addrspace(1) %out, i32 %in) {
11924entry:
11925  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") release
11926  ret void
11927}
11928
11929define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
11930; GFX6-LABEL: global_system_one_as_acq_rel_atomicrmw:
11931; GFX6:       ; %bb.0: ; %entry
11932; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11933; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
11934; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
11935; GFX6-NEXT:    s_mov_b32 s11, s5
11936; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
11937; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
11938; GFX6-NEXT:    s_mov_b32 s10, -1
11939; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
11940; GFX6-NEXT:    s_mov_b32 s5, s11
11941; GFX6-NEXT:    s_mov_b32 s6, s10
11942; GFX6-NEXT:    s_mov_b32 s7, s9
11943; GFX6-NEXT:    v_mov_b32_e32 v0, s8
11944; GFX6-NEXT:    s_waitcnt vmcnt(0)
11945; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
11946; GFX6-NEXT:    s_waitcnt vmcnt(0)
11947; GFX6-NEXT:    buffer_wbinvl1
11948; GFX6-NEXT:    s_endpgm
11949;
11950; GFX7-LABEL: global_system_one_as_acq_rel_atomicrmw:
11951; GFX7:       ; %bb.0: ; %entry
11952; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
11953; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
11954; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
11955; GFX7-NEXT:    v_mov_b32_e32 v0, s6
11956; GFX7-NEXT:    v_mov_b32_e32 v1, s7
11957; GFX7-NEXT:    v_mov_b32_e32 v2, s4
11958; GFX7-NEXT:    s_waitcnt vmcnt(0)
11959; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
11960; GFX7-NEXT:    s_waitcnt vmcnt(0)
11961; GFX7-NEXT:    buffer_wbinvl1_vol
11962; GFX7-NEXT:    s_endpgm
11963;
11964; GFX10-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw:
11965; GFX10-WGP:       ; %bb.0: ; %entry
11966; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
11967; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11968; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
11969; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
11970; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
11971; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
11972; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11973; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
11974; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
11975; GFX10-WGP-NEXT:    buffer_gl1_inv
11976; GFX10-WGP-NEXT:    buffer_gl0_inv
11977; GFX10-WGP-NEXT:    s_endpgm
11978;
11979; GFX10-CU-LABEL: global_system_one_as_acq_rel_atomicrmw:
11980; GFX10-CU:       ; %bb.0: ; %entry
11981; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
11982; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
11983; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
11984; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
11985; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
11986; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
11987; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11988; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
11989; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
11990; GFX10-CU-NEXT:    buffer_gl1_inv
11991; GFX10-CU-NEXT:    buffer_gl0_inv
11992; GFX10-CU-NEXT:    s_endpgm
11993;
11994; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw:
11995; SKIP-CACHE-INV:       ; %bb.0: ; %entry
11996; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
11997; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
11998; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
11999; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12000; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12001; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12002; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12003; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12004; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12005; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12006; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12007; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12008; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12009; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
12010; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12011; SKIP-CACHE-INV-NEXT:    s_endpgm
12012;
12013; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw:
12014; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12015; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12016; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12017; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12018; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12019; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12020; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
12021; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12022; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12023; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12024; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
12025; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12026; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12027;
12028; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw:
12029; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12030; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12031; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12032; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12033; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12034; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12035; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
12036; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12037; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12038; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12039; GFX90A-TGSPLIT-NEXT:    buffer_invl2
12040; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12041; GFX90A-TGSPLIT-NEXT:    s_endpgm
12042;
12043; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw:
12044; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12045; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12046; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12047; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12048; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12049; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12050; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12051; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12052; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
12053; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12054; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
12055; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12056;
12057; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_atomicrmw:
12058; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12059; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12060; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12061; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12062; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12063; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12064; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12065; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12066; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
12067; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12068; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
12069; GFX940-TGSPLIT-NEXT:    s_endpgm
12070;
12071; GFX11-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw:
12072; GFX11-WGP:       ; %bb.0: ; %entry
12073; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12074; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12075; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12076; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12077; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12078; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12079; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12080; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12081; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12082; GFX11-WGP-NEXT:    buffer_gl1_inv
12083; GFX11-WGP-NEXT:    buffer_gl0_inv
12084; GFX11-WGP-NEXT:    s_endpgm
12085;
12086; GFX11-CU-LABEL: global_system_one_as_acq_rel_atomicrmw:
12087; GFX11-CU:       ; %bb.0: ; %entry
12088; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12089; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12090; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12091; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12092; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12093; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12094; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12095; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12096; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12097; GFX11-CU-NEXT:    buffer_gl1_inv
12098; GFX11-CU-NEXT:    buffer_gl0_inv
12099; GFX11-CU-NEXT:    s_endpgm
12100;
12101; GFX12-WGP-LABEL: global_system_one_as_acq_rel_atomicrmw:
12102; GFX12-WGP:       ; %bb.0: ; %entry
12103; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12104; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12105; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12106; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12107; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12108; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
12109; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12110; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12111; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12112; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12113; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
12114; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12115; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
12116; GFX12-WGP-NEXT:    s_endpgm
12117;
12118; GFX12-CU-LABEL: global_system_one_as_acq_rel_atomicrmw:
12119; GFX12-CU:       ; %bb.0: ; %entry
12120; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12121; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12122; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12123; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12124; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12125; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
12126; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12127; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12128; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12129; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12130; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
12131; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12132; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
12133; GFX12-CU-NEXT:    s_endpgm
12134    ptr addrspace(1) %out, i32 %in) {
12135entry:
12136  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
12137  ret void
12138}
12139
12140define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
12141; GFX6-LABEL: global_system_one_as_seq_cst_atomicrmw:
12142; GFX6:       ; %bb.0: ; %entry
12143; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12144; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12145; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12146; GFX6-NEXT:    s_mov_b32 s11, s5
12147; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12148; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12149; GFX6-NEXT:    s_mov_b32 s10, -1
12150; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12151; GFX6-NEXT:    s_mov_b32 s5, s11
12152; GFX6-NEXT:    s_mov_b32 s6, s10
12153; GFX6-NEXT:    s_mov_b32 s7, s9
12154; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12155; GFX6-NEXT:    s_waitcnt vmcnt(0)
12156; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0
12157; GFX6-NEXT:    s_waitcnt vmcnt(0)
12158; GFX6-NEXT:    buffer_wbinvl1
12159; GFX6-NEXT:    s_endpgm
12160;
12161; GFX7-LABEL: global_system_one_as_seq_cst_atomicrmw:
12162; GFX7:       ; %bb.0: ; %entry
12163; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x0
12164; GFX7-NEXT:    s_load_dword s4, s[8:9], 0x2
12165; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12166; GFX7-NEXT:    v_mov_b32_e32 v0, s6
12167; GFX7-NEXT:    v_mov_b32_e32 v1, s7
12168; GFX7-NEXT:    v_mov_b32_e32 v2, s4
12169; GFX7-NEXT:    s_waitcnt vmcnt(0)
12170; GFX7-NEXT:    flat_atomic_swap v[0:1], v2
12171; GFX7-NEXT:    s_waitcnt vmcnt(0)
12172; GFX7-NEXT:    buffer_wbinvl1_vol
12173; GFX7-NEXT:    s_endpgm
12174;
12175; GFX10-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw:
12176; GFX10-WGP:       ; %bb.0: ; %entry
12177; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12178; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12179; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12180; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12181; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12182; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12183; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12184; GFX10-WGP-NEXT:    global_atomic_swap v0, v1, s[4:5]
12185; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12186; GFX10-WGP-NEXT:    buffer_gl1_inv
12187; GFX10-WGP-NEXT:    buffer_gl0_inv
12188; GFX10-WGP-NEXT:    s_endpgm
12189;
12190; GFX10-CU-LABEL: global_system_one_as_seq_cst_atomicrmw:
12191; GFX10-CU:       ; %bb.0: ; %entry
12192; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12193; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12194; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12195; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12196; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12197; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12198; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12199; GFX10-CU-NEXT:    global_atomic_swap v0, v1, s[4:5]
12200; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12201; GFX10-CU-NEXT:    buffer_gl1_inv
12202; GFX10-CU-NEXT:    buffer_gl0_inv
12203; GFX10-CU-NEXT:    s_endpgm
12204;
12205; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw:
12206; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12207; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12208; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12209; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12210; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12211; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12212; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12213; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12214; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12215; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12216; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12217; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12218; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12219; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12220; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0
12221; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12222; SKIP-CACHE-INV-NEXT:    s_endpgm
12223;
12224; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw:
12225; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12226; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12227; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12228; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12229; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12230; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12231; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
12232; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12233; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12234; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12235; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
12236; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12237; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12238;
12239; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw:
12240; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12241; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12242; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12243; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12244; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12245; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12246; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
12247; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12248; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[4:5]
12249; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12250; GFX90A-TGSPLIT-NEXT:    buffer_invl2
12251; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12252; GFX90A-TGSPLIT-NEXT:    s_endpgm
12253;
12254; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw:
12255; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12256; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12257; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12258; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12259; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12260; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12261; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12262; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12263; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
12264; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12265; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
12266; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12267;
12268; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_atomicrmw:
12269; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12270; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12271; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12272; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12273; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12274; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12275; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12276; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12277; GFX940-TGSPLIT-NEXT:    global_atomic_swap v0, v1, s[0:1] sc1
12278; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12279; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
12280; GFX940-TGSPLIT-NEXT:    s_endpgm
12281;
12282; GFX11-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw:
12283; GFX11-WGP:       ; %bb.0: ; %entry
12284; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12285; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12286; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12287; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12288; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12289; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12290; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12291; GFX11-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12292; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12293; GFX11-WGP-NEXT:    buffer_gl1_inv
12294; GFX11-WGP-NEXT:    buffer_gl0_inv
12295; GFX11-WGP-NEXT:    s_endpgm
12296;
12297; GFX11-CU-LABEL: global_system_one_as_seq_cst_atomicrmw:
12298; GFX11-CU:       ; %bb.0: ; %entry
12299; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12300; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12301; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12302; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12303; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12304; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12305; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12306; GFX11-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1]
12307; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12308; GFX11-CU-NEXT:    buffer_gl1_inv
12309; GFX11-CU-NEXT:    buffer_gl0_inv
12310; GFX11-CU-NEXT:    s_endpgm
12311;
12312; GFX12-WGP-LABEL: global_system_one_as_seq_cst_atomicrmw:
12313; GFX12-WGP:       ; %bb.0: ; %entry
12314; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12315; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12316; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12317; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12318; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12319; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
12320; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12321; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12322; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12323; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12324; GFX12-WGP-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
12325; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12326; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
12327; GFX12-WGP-NEXT:    s_endpgm
12328;
12329; GFX12-CU-LABEL: global_system_one_as_seq_cst_atomicrmw:
12330; GFX12-CU:       ; %bb.0: ; %entry
12331; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12332; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12333; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12334; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12335; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12336; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
12337; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12338; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12339; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12340; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12341; GFX12-CU-NEXT:    global_atomic_swap_b32 v0, v1, s[0:1] scope:SCOPE_SYS
12342; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12343; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
12344; GFX12-CU-NEXT:    s_endpgm
12345    ptr addrspace(1) %out, i32 %in) {
12346entry:
12347  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
12348  ret void
12349}
12350
12351define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
12352; GFX6-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12353; GFX6:       ; %bb.0: ; %entry
12354; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12355; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12356; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12357; GFX6-NEXT:    s_mov_b32 s11, s5
12358; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12359; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12360; GFX6-NEXT:    s_mov_b32 s10, -1
12361; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12362; GFX6-NEXT:    s_mov_b32 s5, s11
12363; GFX6-NEXT:    s_mov_b32 s6, s10
12364; GFX6-NEXT:    s_mov_b32 s7, s9
12365; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12366; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
12367; GFX6-NEXT:    s_waitcnt vmcnt(0)
12368; GFX6-NEXT:    buffer_wbinvl1
12369; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12370; GFX6-NEXT:    s_endpgm
12371;
12372; GFX7-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12373; GFX7:       ; %bb.0: ; %entry
12374; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12375; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
12376; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12377; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12378; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12379; GFX7-NEXT:    v_mov_b32_e32 v2, s6
12380; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
12381; GFX7-NEXT:    s_waitcnt vmcnt(0)
12382; GFX7-NEXT:    buffer_wbinvl1_vol
12383; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12384; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12385; GFX7-NEXT:    flat_store_dword v[0:1], v2
12386; GFX7-NEXT:    s_endpgm
12387;
12388; GFX10-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12389; GFX10-WGP:       ; %bb.0: ; %entry
12390; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12391; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12392; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12393; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12394; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12395; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12396; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12397; GFX10-WGP-NEXT:    buffer_gl1_inv
12398; GFX10-WGP-NEXT:    buffer_gl0_inv
12399; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12400; GFX10-WGP-NEXT:    s_endpgm
12401;
12402; GFX10-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12403; GFX10-CU:       ; %bb.0: ; %entry
12404; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12405; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12406; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12407; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12408; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12409; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12410; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12411; GFX10-CU-NEXT:    buffer_gl1_inv
12412; GFX10-CU-NEXT:    buffer_gl0_inv
12413; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12414; GFX10-CU-NEXT:    s_endpgm
12415;
12416; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12417; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12418; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12419; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12420; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12421; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12422; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12423; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12424; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12425; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12426; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12427; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12428; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12429; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12430; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
12431; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12432; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12433; SKIP-CACHE-INV-NEXT:    s_endpgm
12434;
12435; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12436; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12437; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12438; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12439; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12440; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12441; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12442; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12443; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12444; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
12445; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12446; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12447; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12448;
12449; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12450; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12451; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12452; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12453; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12454; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12455; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12456; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12457; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12458; GFX90A-TGSPLIT-NEXT:    buffer_invl2
12459; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12460; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12461; GFX90A-TGSPLIT-NEXT:    s_endpgm
12462;
12463; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12464; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12465; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12466; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12467; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12468; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12469; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12470; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12471; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12472; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
12473; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12474; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12475;
12476; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12477; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12478; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12479; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12480; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12481; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12482; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12483; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12484; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12485; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
12486; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12487; GFX940-TGSPLIT-NEXT:    s_endpgm
12488;
12489; GFX11-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12490; GFX11-WGP:       ; %bb.0: ; %entry
12491; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12492; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12493; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12494; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12495; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12496; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12497; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12498; GFX11-WGP-NEXT:    buffer_gl1_inv
12499; GFX11-WGP-NEXT:    buffer_gl0_inv
12500; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12501; GFX11-WGP-NEXT:    s_endpgm
12502;
12503; GFX11-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12504; GFX11-CU:       ; %bb.0: ; %entry
12505; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12506; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12507; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12508; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12509; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12510; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12511; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12512; GFX11-CU-NEXT:    buffer_gl1_inv
12513; GFX11-CU-NEXT:    buffer_gl0_inv
12514; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12515; GFX11-CU-NEXT:    s_endpgm
12516;
12517; GFX12-WGP-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12518; GFX12-WGP:       ; %bb.0: ; %entry
12519; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12520; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12521; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12522; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12523; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12524; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12525; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12526; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
12527; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12528; GFX12-WGP-NEXT:    s_endpgm
12529;
12530; GFX12-CU-LABEL: global_system_one_as_acquire_ret_atomicrmw:
12531; GFX12-CU:       ; %bb.0: ; %entry
12532; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12533; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12534; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12535; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12536; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12537; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12538; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12539; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
12540; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12541; GFX12-CU-NEXT:    s_endpgm
12542    ptr addrspace(1) %out, i32 %in) {
12543entry:
12544  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acquire
12545  store i32 %val, ptr addrspace(1) %out, align 4
12546  ret void
12547}
12548
12549define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
12550; GFX6-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12551; GFX6:       ; %bb.0: ; %entry
12552; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12553; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12554; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12555; GFX6-NEXT:    s_mov_b32 s11, s5
12556; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12557; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12558; GFX6-NEXT:    s_mov_b32 s10, -1
12559; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12560; GFX6-NEXT:    s_mov_b32 s5, s11
12561; GFX6-NEXT:    s_mov_b32 s6, s10
12562; GFX6-NEXT:    s_mov_b32 s7, s9
12563; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12564; GFX6-NEXT:    s_waitcnt vmcnt(0)
12565; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
12566; GFX6-NEXT:    s_waitcnt vmcnt(0)
12567; GFX6-NEXT:    buffer_wbinvl1
12568; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12569; GFX6-NEXT:    s_endpgm
12570;
12571; GFX7-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12572; GFX7:       ; %bb.0: ; %entry
12573; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12574; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
12575; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12576; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12577; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12578; GFX7-NEXT:    v_mov_b32_e32 v2, s6
12579; GFX7-NEXT:    s_waitcnt vmcnt(0)
12580; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
12581; GFX7-NEXT:    s_waitcnt vmcnt(0)
12582; GFX7-NEXT:    buffer_wbinvl1_vol
12583; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12584; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12585; GFX7-NEXT:    flat_store_dword v[0:1], v2
12586; GFX7-NEXT:    s_endpgm
12587;
12588; GFX10-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12589; GFX10-WGP:       ; %bb.0: ; %entry
12590; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12591; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12592; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12593; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12594; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12595; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12596; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12597; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12598; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12599; GFX10-WGP-NEXT:    buffer_gl1_inv
12600; GFX10-WGP-NEXT:    buffer_gl0_inv
12601; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12602; GFX10-WGP-NEXT:    s_endpgm
12603;
12604; GFX10-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12605; GFX10-CU:       ; %bb.0: ; %entry
12606; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12607; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12608; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12609; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12610; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12611; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12612; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12613; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12614; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12615; GFX10-CU-NEXT:    buffer_gl1_inv
12616; GFX10-CU-NEXT:    buffer_gl0_inv
12617; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12618; GFX10-CU-NEXT:    s_endpgm
12619;
12620; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12621; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12622; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12623; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12624; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12625; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12626; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12627; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12628; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12629; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12630; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12631; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12632; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12633; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12634; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12635; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
12636; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12637; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12638; SKIP-CACHE-INV-NEXT:    s_endpgm
12639;
12640; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12641; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12642; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12643; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12644; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12645; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12646; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12647; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
12648; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12649; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12650; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12651; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
12652; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12653; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12654; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12655;
12656; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12657; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12658; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12659; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12660; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12661; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12662; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12663; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
12664; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12665; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12666; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12667; GFX90A-TGSPLIT-NEXT:    buffer_invl2
12668; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12669; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12670; GFX90A-TGSPLIT-NEXT:    s_endpgm
12671;
12672; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12673; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12674; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12675; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12676; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12677; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12678; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12679; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12680; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12681; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12682; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12683; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
12684; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12685; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12686;
12687; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12688; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12689; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12690; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12691; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12692; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12693; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12694; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12695; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12696; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12697; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12698; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
12699; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12700; GFX940-TGSPLIT-NEXT:    s_endpgm
12701;
12702; GFX11-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12703; GFX11-WGP:       ; %bb.0: ; %entry
12704; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12705; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12706; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12707; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12708; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12709; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12710; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12711; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12712; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12713; GFX11-WGP-NEXT:    buffer_gl1_inv
12714; GFX11-WGP-NEXT:    buffer_gl0_inv
12715; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12716; GFX11-WGP-NEXT:    s_endpgm
12717;
12718; GFX11-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12719; GFX11-CU:       ; %bb.0: ; %entry
12720; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12721; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12722; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12723; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12724; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12725; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12726; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12727; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12728; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12729; GFX11-CU-NEXT:    buffer_gl1_inv
12730; GFX11-CU-NEXT:    buffer_gl0_inv
12731; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12732; GFX11-CU-NEXT:    s_endpgm
12733;
12734; GFX12-WGP-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12735; GFX12-WGP:       ; %bb.0: ; %entry
12736; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12737; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12738; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12739; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12740; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12741; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
12742; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12743; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12744; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12745; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12746; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12747; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12748; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12749; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12750; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
12751; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12752; GFX12-WGP-NEXT:    s_endpgm
12753;
12754; GFX12-CU-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
12755; GFX12-CU:       ; %bb.0: ; %entry
12756; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12757; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12758; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12759; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12760; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12761; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
12762; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12763; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12764; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12765; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12766; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12767; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12768; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12769; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12770; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
12771; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12772; GFX12-CU-NEXT:    s_endpgm
12773    ptr addrspace(1) %out, i32 %in) {
12774entry:
12775  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") acq_rel
12776  store i32 %val, ptr addrspace(1) %out, align 4
12777  ret void
12778}
12779
12780define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
12781; GFX6-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12782; GFX6:       ; %bb.0: ; %entry
12783; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12784; GFX6-NEXT:    s_load_dword s8, s[8:9], 0x2
12785; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
12786; GFX6-NEXT:    s_mov_b32 s11, s5
12787; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
12788; GFX6-NEXT:    s_mov_b32 s9, 0x100f000
12789; GFX6-NEXT:    s_mov_b32 s10, -1
12790; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
12791; GFX6-NEXT:    s_mov_b32 s5, s11
12792; GFX6-NEXT:    s_mov_b32 s6, s10
12793; GFX6-NEXT:    s_mov_b32 s7, s9
12794; GFX6-NEXT:    v_mov_b32_e32 v0, s8
12795; GFX6-NEXT:    s_waitcnt vmcnt(0)
12796; GFX6-NEXT:    buffer_atomic_swap v0, off, s[4:7], 0 glc
12797; GFX6-NEXT:    s_waitcnt vmcnt(0)
12798; GFX6-NEXT:    buffer_wbinvl1
12799; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
12800; GFX6-NEXT:    s_endpgm
12801;
12802; GFX7-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12803; GFX7:       ; %bb.0: ; %entry
12804; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12805; GFX7-NEXT:    s_load_dword s6, s[8:9], 0x2
12806; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
12807; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12808; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12809; GFX7-NEXT:    v_mov_b32_e32 v2, s6
12810; GFX7-NEXT:    s_waitcnt vmcnt(0)
12811; GFX7-NEXT:    flat_atomic_swap v2, v[0:1], v2 glc
12812; GFX7-NEXT:    s_waitcnt vmcnt(0)
12813; GFX7-NEXT:    buffer_wbinvl1_vol
12814; GFX7-NEXT:    v_mov_b32_e32 v0, s4
12815; GFX7-NEXT:    v_mov_b32_e32 v1, s5
12816; GFX7-NEXT:    flat_store_dword v[0:1], v2
12817; GFX7-NEXT:    s_endpgm
12818;
12819; GFX10-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12820; GFX10-WGP:       ; %bb.0: ; %entry
12821; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
12822; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12823; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0x8
12824; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12825; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s6
12826; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12827; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12828; GFX10-WGP-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12829; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
12830; GFX10-WGP-NEXT:    buffer_gl1_inv
12831; GFX10-WGP-NEXT:    buffer_gl0_inv
12832; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
12833; GFX10-WGP-NEXT:    s_endpgm
12834;
12835; GFX10-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12836; GFX10-CU:       ; %bb.0: ; %entry
12837; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
12838; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12839; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0x8
12840; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
12841; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s6
12842; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12843; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12844; GFX10-CU-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12845; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
12846; GFX10-CU-NEXT:    buffer_gl1_inv
12847; GFX10-CU-NEXT:    buffer_gl0_inv
12848; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
12849; GFX10-CU-NEXT:    s_endpgm
12850;
12851; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12852; SKIP-CACHE-INV:       ; %bb.0: ; %entry
12853; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12854; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[4:5], 0x2
12855; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
12856; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, s1
12857; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
12858; SKIP-CACHE-INV-NEXT:    s_mov_b32 s5, 0xf000
12859; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
12860; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
12861; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s7
12862; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s6
12863; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s5
12864; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s4
12865; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12866; SKIP-CACHE-INV-NEXT:    buffer_atomic_swap v0, off, s[0:3], 0 glc
12867; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
12868; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
12869; SKIP-CACHE-INV-NEXT:    s_endpgm
12870;
12871; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12872; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
12873; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12874; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12875; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12876; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12877; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12878; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
12879; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12880; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12881; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12882; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
12883; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
12884; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12885; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
12886;
12887; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12888; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
12889; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12890; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
12891; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0x8
12892; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12893; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
12894; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
12895; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12896; GFX90A-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[4:5] glc
12897; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12898; GFX90A-TGSPLIT-NEXT:    buffer_invl2
12899; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
12900; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
12901; GFX90A-TGSPLIT-NEXT:    s_endpgm
12902;
12903; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12904; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
12905; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12906; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12907; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12908; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12909; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12910; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12911; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12912; GFX940-NOTTGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12913; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12914; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
12915; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12916; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
12917;
12918; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12919; GFX940-TGSPLIT:       ; %bb.0: ; %entry
12920; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
12921; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
12922; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0x8
12923; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
12924; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
12925; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
12926; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12927; GFX940-TGSPLIT-NEXT:    global_atomic_swap v1, v0, v1, s[0:1] sc0 sc1
12928; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
12929; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
12930; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
12931; GFX940-TGSPLIT-NEXT:    s_endpgm
12932;
12933; GFX11-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12934; GFX11-WGP:       ; %bb.0: ; %entry
12935; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
12936; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12937; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12938; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
12939; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s2
12940; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12941; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
12942; GFX11-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12943; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
12944; GFX11-WGP-NEXT:    buffer_gl1_inv
12945; GFX11-WGP-NEXT:    buffer_gl0_inv
12946; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12947; GFX11-WGP-NEXT:    s_endpgm
12948;
12949; GFX11-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12950; GFX11-CU:       ; %bb.0: ; %entry
12951; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
12952; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12953; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12954; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
12955; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s2
12956; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12957; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
12958; GFX11-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] glc
12959; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
12960; GFX11-CU-NEXT:    buffer_gl1_inv
12961; GFX11-CU-NEXT:    buffer_gl0_inv
12962; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
12963; GFX11-CU-NEXT:    s_endpgm
12964;
12965; GFX12-WGP-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12966; GFX12-WGP:       ; %bb.0: ; %entry
12967; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
12968; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12969; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0x8
12970; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
12971; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s2
12972; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
12973; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12974; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12975; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12976; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
12977; GFX12-WGP-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12978; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
12979; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
12980; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
12981; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
12982; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
12983; GFX12-WGP-NEXT:    s_endpgm
12984;
12985; GFX12-CU-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
12986; GFX12-CU:       ; %bb.0: ; %entry
12987; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
12988; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
12989; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0x8
12990; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
12991; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s2
12992; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
12993; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12994; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
12995; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
12996; GFX12-CU-NEXT:    s_wait_storecnt 0x0
12997; GFX12-CU-NEXT:    global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS
12998; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
12999; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13000; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13001; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
13002; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
13003; GFX12-CU-NEXT:    s_endpgm
13004    ptr addrspace(1) %out, i32 %in) {
13005entry:
13006  %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in syncscope("one-as") seq_cst
13007  store i32 %val, ptr addrspace(1) %out, align 4
13008  ret void
13009}
13010
13011define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
13012; GFX6-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13013; GFX6:       ; %bb.0: ; %entry
13014; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
13015; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
13016; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
13017; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
13018; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13019; GFX6-NEXT:    s_mov_b32 s12, s5
13020; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13021; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
13022; GFX6-NEXT:    s_mov_b32 s11, -1
13023; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13024; GFX6-NEXT:    s_mov_b32 s5, s12
13025; GFX6-NEXT:    s_mov_b32 s6, s11
13026; GFX6-NEXT:    s_mov_b32 s7, s10
13027; GFX6-NEXT:    v_mov_b32_e32 v0, s9
13028; GFX6-NEXT:    v_mov_b32_e32 v2, s8
13029; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13030; GFX6-NEXT:    v_mov_b32_e32 v1, v2
13031; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13032; GFX6-NEXT:    s_endpgm
13033;
13034; GFX7-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13035; GFX7:       ; %bb.0: ; %entry
13036; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
13037; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
13038; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
13039; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
13040; GFX7-NEXT:    s_mov_b64 s[10:11], 16
13041; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13042; GFX7-NEXT:    s_mov_b32 s4, s8
13043; GFX7-NEXT:    s_mov_b32 s5, s9
13044; GFX7-NEXT:    s_mov_b32 s9, s10
13045; GFX7-NEXT:    s_mov_b32 s8, s11
13046; GFX7-NEXT:    s_add_u32 s4, s4, s9
13047; GFX7-NEXT:    s_addc_u32 s8, s5, s8
13048; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
13049; GFX7-NEXT:    s_mov_b32 s5, s8
13050; GFX7-NEXT:    v_mov_b32_e32 v2, s7
13051; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13052; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13053; GFX7-NEXT:    v_mov_b32_e32 v3, v0
13054; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13055; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13056; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
13057; GFX7-NEXT:    s_endpgm
13058;
13059; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13060; GFX10-WGP:       ; %bb.0: ; %entry
13061; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13062; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13063; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
13064; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
13065; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13066; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
13067; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
13068; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13069; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
13070; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13071; GFX10-WGP-NEXT:    s_endpgm
13072;
13073; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13074; GFX10-CU:       ; %bb.0: ; %entry
13075; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13076; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13077; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
13078; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
13079; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13080; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
13081; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
13082; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13083; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
13084; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13085; GFX10-CU-NEXT:    s_endpgm
13086;
13087; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13088; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13089; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
13090; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
13091; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
13092; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
13093; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13094; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
13095; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13096; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
13097; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
13098; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13099; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
13100; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
13101; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
13102; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
13103; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
13104; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13105; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
13106; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13107; SKIP-CACHE-INV-NEXT:    s_endpgm
13108;
13109; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13110; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13111; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13112; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13113; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13114; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13115; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13116; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13117; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13118; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13119; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13120; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13121; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13122;
13123; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13124; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13125; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13126; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13127; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13128; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13129; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13130; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13131; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13132; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13133; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13134; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13135; GFX90A-TGSPLIT-NEXT:    s_endpgm
13136;
13137; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13138; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13139; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13140; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13141; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13142; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13143; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13144; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13145; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13146; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13147; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13148; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13149; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13150;
13151; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13152; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13153; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13154; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13155; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13156; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13157; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13158; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13159; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13160; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13161; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13162; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13163; GFX940-TGSPLIT-NEXT:    s_endpgm
13164;
13165; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13166; GFX11-WGP:       ; %bb.0: ; %entry
13167; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13168; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13169; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13170; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13171; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13172; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
13173; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
13174; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13175; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
13176; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13177; GFX11-WGP-NEXT:    s_endpgm
13178;
13179; GFX11-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13180; GFX11-CU:       ; %bb.0: ; %entry
13181; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13182; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13183; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13184; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13185; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13186; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
13187; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
13188; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13189; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
13190; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13191; GFX11-CU-NEXT:    s_endpgm
13192;
13193; GFX12-WGP-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13194; GFX12-WGP:       ; %bb.0: ; %entry
13195; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13196; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13197; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13198; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13199; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13200; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
13201; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
13202; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13203; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
13204; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13205; GFX12-WGP-NEXT:    s_endpgm
13206;
13207; GFX12-CU-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
13208; GFX12-CU:       ; %bb.0: ; %entry
13209; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13210; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13211; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13212; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13213; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13214; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
13215; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
13216; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13217; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
13218; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13219; GFX12-CU-NEXT:    s_endpgm
13220    ptr addrspace(1) %out, i32 %in, i32 %old) {
13221entry:
13222  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
13223  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
13224  ret void
13225}
13226
13227define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
13228; GFX6-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13229; GFX6:       ; %bb.0: ; %entry
13230; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
13231; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
13232; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
13233; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
13234; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13235; GFX6-NEXT:    s_mov_b32 s12, s5
13236; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13237; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
13238; GFX6-NEXT:    s_mov_b32 s11, -1
13239; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13240; GFX6-NEXT:    s_mov_b32 s5, s12
13241; GFX6-NEXT:    s_mov_b32 s6, s11
13242; GFX6-NEXT:    s_mov_b32 s7, s10
13243; GFX6-NEXT:    v_mov_b32_e32 v0, s9
13244; GFX6-NEXT:    v_mov_b32_e32 v2, s8
13245; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13246; GFX6-NEXT:    v_mov_b32_e32 v1, v2
13247; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13248; GFX6-NEXT:    s_waitcnt vmcnt(0)
13249; GFX6-NEXT:    buffer_wbinvl1
13250; GFX6-NEXT:    s_endpgm
13251;
13252; GFX7-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13253; GFX7:       ; %bb.0: ; %entry
13254; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
13255; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
13256; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
13257; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
13258; GFX7-NEXT:    s_mov_b64 s[10:11], 16
13259; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13260; GFX7-NEXT:    s_mov_b32 s4, s8
13261; GFX7-NEXT:    s_mov_b32 s5, s9
13262; GFX7-NEXT:    s_mov_b32 s9, s10
13263; GFX7-NEXT:    s_mov_b32 s8, s11
13264; GFX7-NEXT:    s_add_u32 s4, s4, s9
13265; GFX7-NEXT:    s_addc_u32 s8, s5, s8
13266; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
13267; GFX7-NEXT:    s_mov_b32 s5, s8
13268; GFX7-NEXT:    v_mov_b32_e32 v2, s7
13269; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13270; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13271; GFX7-NEXT:    v_mov_b32_e32 v3, v0
13272; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13273; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13274; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
13275; GFX7-NEXT:    s_waitcnt vmcnt(0)
13276; GFX7-NEXT:    buffer_wbinvl1_vol
13277; GFX7-NEXT:    s_endpgm
13278;
13279; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13280; GFX10-WGP:       ; %bb.0: ; %entry
13281; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13282; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13283; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
13284; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
13285; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13286; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
13287; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
13288; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13289; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
13290; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13291; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13292; GFX10-WGP-NEXT:    buffer_gl1_inv
13293; GFX10-WGP-NEXT:    buffer_gl0_inv
13294; GFX10-WGP-NEXT:    s_endpgm
13295;
13296; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13297; GFX10-CU:       ; %bb.0: ; %entry
13298; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13299; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13300; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
13301; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
13302; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13303; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
13304; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
13305; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13306; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
13307; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13308; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13309; GFX10-CU-NEXT:    buffer_gl1_inv
13310; GFX10-CU-NEXT:    buffer_gl0_inv
13311; GFX10-CU-NEXT:    s_endpgm
13312;
13313; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13314; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13315; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
13316; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
13317; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
13318; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
13319; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13320; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
13321; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13322; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
13323; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
13324; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13325; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
13326; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
13327; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
13328; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
13329; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
13330; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13331; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
13332; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13333; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13334; SKIP-CACHE-INV-NEXT:    s_endpgm
13335;
13336; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13337; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13338; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13339; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13340; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13341; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13342; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13343; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13344; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13345; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13346; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13347; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13348; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13349; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
13350; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13351; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13352;
13353; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13354; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13355; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13356; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13357; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13358; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13359; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13360; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13361; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13362; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13363; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13364; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13365; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13366; GFX90A-TGSPLIT-NEXT:    buffer_invl2
13367; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13368; GFX90A-TGSPLIT-NEXT:    s_endpgm
13369;
13370; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13371; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13372; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13373; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13374; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13375; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13376; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13377; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13378; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13379; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13380; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13381; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13382; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13383; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
13384; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13385;
13386; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13387; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13388; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13389; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13390; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13391; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13392; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13393; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13394; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13395; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13396; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13397; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13398; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13399; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
13400; GFX940-TGSPLIT-NEXT:    s_endpgm
13401;
13402; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13403; GFX11-WGP:       ; %bb.0: ; %entry
13404; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13405; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13406; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13407; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13408; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13409; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
13410; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
13411; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13412; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
13413; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13414; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13415; GFX11-WGP-NEXT:    buffer_gl1_inv
13416; GFX11-WGP-NEXT:    buffer_gl0_inv
13417; GFX11-WGP-NEXT:    s_endpgm
13418;
13419; GFX11-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13420; GFX11-CU:       ; %bb.0: ; %entry
13421; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13422; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13423; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13424; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13425; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13426; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
13427; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
13428; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13429; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
13430; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13431; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13432; GFX11-CU-NEXT:    buffer_gl1_inv
13433; GFX11-CU-NEXT:    buffer_gl0_inv
13434; GFX11-CU-NEXT:    s_endpgm
13435;
13436; GFX12-WGP-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13437; GFX12-WGP:       ; %bb.0: ; %entry
13438; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13439; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13440; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13441; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13442; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13443; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
13444; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
13445; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13446; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
13447; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13448; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13449; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
13450; GFX12-WGP-NEXT:    s_endpgm
13451;
13452; GFX12-CU-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
13453; GFX12-CU:       ; %bb.0: ; %entry
13454; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13455; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13456; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13457; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13458; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13459; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
13460; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
13461; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13462; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
13463; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13464; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13465; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
13466; GFX12-CU-NEXT:    s_endpgm
13467    ptr addrspace(1) %out, i32 %in, i32 %old) {
13468entry:
13469  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
13470  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
13471  ret void
13472}
13473
13474define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
13475; GFX6-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13476; GFX6:       ; %bb.0: ; %entry
13477; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
13478; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
13479; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
13480; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
13481; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13482; GFX6-NEXT:    s_mov_b32 s12, s5
13483; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13484; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
13485; GFX6-NEXT:    s_mov_b32 s11, -1
13486; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13487; GFX6-NEXT:    s_mov_b32 s5, s12
13488; GFX6-NEXT:    s_mov_b32 s6, s11
13489; GFX6-NEXT:    s_mov_b32 s7, s10
13490; GFX6-NEXT:    v_mov_b32_e32 v0, s9
13491; GFX6-NEXT:    v_mov_b32_e32 v2, s8
13492; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13493; GFX6-NEXT:    v_mov_b32_e32 v1, v2
13494; GFX6-NEXT:    s_waitcnt vmcnt(0)
13495; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13496; GFX6-NEXT:    s_endpgm
13497;
13498; GFX7-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13499; GFX7:       ; %bb.0: ; %entry
13500; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
13501; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
13502; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
13503; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
13504; GFX7-NEXT:    s_mov_b64 s[10:11], 16
13505; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13506; GFX7-NEXT:    s_mov_b32 s4, s8
13507; GFX7-NEXT:    s_mov_b32 s5, s9
13508; GFX7-NEXT:    s_mov_b32 s9, s10
13509; GFX7-NEXT:    s_mov_b32 s8, s11
13510; GFX7-NEXT:    s_add_u32 s4, s4, s9
13511; GFX7-NEXT:    s_addc_u32 s8, s5, s8
13512; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
13513; GFX7-NEXT:    s_mov_b32 s5, s8
13514; GFX7-NEXT:    v_mov_b32_e32 v2, s7
13515; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13516; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13517; GFX7-NEXT:    v_mov_b32_e32 v3, v0
13518; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13519; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13520; GFX7-NEXT:    s_waitcnt vmcnt(0)
13521; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
13522; GFX7-NEXT:    s_endpgm
13523;
13524; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13525; GFX10-WGP:       ; %bb.0: ; %entry
13526; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13527; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13528; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
13529; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
13530; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13531; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
13532; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
13533; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13534; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
13535; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13536; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13537; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13538; GFX10-WGP-NEXT:    s_endpgm
13539;
13540; GFX10-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13541; GFX10-CU:       ; %bb.0: ; %entry
13542; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13543; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13544; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
13545; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
13546; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13547; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
13548; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
13549; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13550; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
13551; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13552; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13553; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13554; GFX10-CU-NEXT:    s_endpgm
13555;
13556; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13557; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13558; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
13559; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
13560; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
13561; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
13562; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13563; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
13564; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13565; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
13566; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
13567; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13568; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
13569; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
13570; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
13571; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
13572; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
13573; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13574; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
13575; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13576; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13577; SKIP-CACHE-INV-NEXT:    s_endpgm
13578;
13579; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13580; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13581; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13582; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13583; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13584; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13585; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13586; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13587; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13588; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13589; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13590; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
13591; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13592; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13593; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13594;
13595; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13596; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13597; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13598; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13599; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13600; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13601; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13602; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13603; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13604; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13605; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13606; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
13607; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13608; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13609; GFX90A-TGSPLIT-NEXT:    s_endpgm
13610;
13611; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13612; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13613; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13614; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13615; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13616; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13617; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13618; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13619; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13620; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13621; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13622; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
13623; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13624; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13625; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13626;
13627; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13628; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13629; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13630; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13631; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13632; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13633; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13634; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13635; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13636; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13637; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13638; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
13639; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13640; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13641; GFX940-TGSPLIT-NEXT:    s_endpgm
13642;
13643; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13644; GFX11-WGP:       ; %bb.0: ; %entry
13645; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13646; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13647; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13648; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13649; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13650; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
13651; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
13652; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13653; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
13654; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13655; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13656; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13657; GFX11-WGP-NEXT:    s_endpgm
13658;
13659; GFX11-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13660; GFX11-CU:       ; %bb.0: ; %entry
13661; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13662; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13663; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13664; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13665; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13666; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
13667; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
13668; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13669; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
13670; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13671; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13672; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13673; GFX11-CU-NEXT:    s_endpgm
13674;
13675; GFX12-WGP-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13676; GFX12-WGP:       ; %bb.0: ; %entry
13677; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13678; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13679; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13680; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13681; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13682; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
13683; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
13684; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13685; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
13686; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
13687; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13688; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13689; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13690; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13691; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13692; GFX12-WGP-NEXT:    s_endpgm
13693;
13694; GFX12-CU-LABEL: global_system_one_as_release_monotonic_cmpxchg:
13695; GFX12-CU:       ; %bb.0: ; %entry
13696; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13697; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13698; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13699; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13700; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13701; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
13702; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
13703; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13704; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
13705; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
13706; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13707; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13708; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13709; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13710; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13711; GFX12-CU-NEXT:    s_endpgm
13712    ptr addrspace(1) %out, i32 %in, i32 %old) {
13713entry:
13714  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
13715  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
13716  ret void
13717}
13718
13719define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
13720; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13721; GFX6:       ; %bb.0: ; %entry
13722; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
13723; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
13724; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
13725; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
13726; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
13727; GFX6-NEXT:    s_mov_b32 s12, s5
13728; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
13729; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
13730; GFX6-NEXT:    s_mov_b32 s11, -1
13731; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
13732; GFX6-NEXT:    s_mov_b32 s5, s12
13733; GFX6-NEXT:    s_mov_b32 s6, s11
13734; GFX6-NEXT:    s_mov_b32 s7, s10
13735; GFX6-NEXT:    v_mov_b32_e32 v0, s9
13736; GFX6-NEXT:    v_mov_b32_e32 v2, s8
13737; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13738; GFX6-NEXT:    v_mov_b32_e32 v1, v2
13739; GFX6-NEXT:    s_waitcnt vmcnt(0)
13740; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
13741; GFX6-NEXT:    s_waitcnt vmcnt(0)
13742; GFX6-NEXT:    buffer_wbinvl1
13743; GFX6-NEXT:    s_endpgm
13744;
13745; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13746; GFX7:       ; %bb.0: ; %entry
13747; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
13748; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
13749; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
13750; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
13751; GFX7-NEXT:    s_mov_b64 s[10:11], 16
13752; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
13753; GFX7-NEXT:    s_mov_b32 s4, s8
13754; GFX7-NEXT:    s_mov_b32 s5, s9
13755; GFX7-NEXT:    s_mov_b32 s9, s10
13756; GFX7-NEXT:    s_mov_b32 s8, s11
13757; GFX7-NEXT:    s_add_u32 s4, s4, s9
13758; GFX7-NEXT:    s_addc_u32 s8, s5, s8
13759; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
13760; GFX7-NEXT:    s_mov_b32 s5, s8
13761; GFX7-NEXT:    v_mov_b32_e32 v2, s7
13762; GFX7-NEXT:    v_mov_b32_e32 v0, s6
13763; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13764; GFX7-NEXT:    v_mov_b32_e32 v3, v0
13765; GFX7-NEXT:    v_mov_b32_e32 v0, s4
13766; GFX7-NEXT:    v_mov_b32_e32 v1, s5
13767; GFX7-NEXT:    s_waitcnt vmcnt(0)
13768; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
13769; GFX7-NEXT:    s_waitcnt vmcnt(0)
13770; GFX7-NEXT:    buffer_wbinvl1_vol
13771; GFX7-NEXT:    s_endpgm
13772;
13773; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13774; GFX10-WGP:       ; %bb.0: ; %entry
13775; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
13776; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13777; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
13778; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
13779; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13780; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
13781; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
13782; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13783; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
13784; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
13785; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13786; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13787; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13788; GFX10-WGP-NEXT:    buffer_gl1_inv
13789; GFX10-WGP-NEXT:    buffer_gl0_inv
13790; GFX10-WGP-NEXT:    s_endpgm
13791;
13792; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13793; GFX10-CU:       ; %bb.0: ; %entry
13794; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
13795; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13796; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
13797; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
13798; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
13799; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
13800; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
13801; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13802; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
13803; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
13804; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13805; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
13806; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13807; GFX10-CU-NEXT:    buffer_gl1_inv
13808; GFX10-CU-NEXT:    buffer_gl0_inv
13809; GFX10-CU-NEXT:    s_endpgm
13810;
13811; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13812; SKIP-CACHE-INV:       ; %bb.0: ; %entry
13813; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
13814; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
13815; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
13816; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
13817; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
13818; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
13819; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
13820; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
13821; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
13822; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
13823; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
13824; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
13825; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
13826; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
13827; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
13828; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
13829; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
13830; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13831; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
13832; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
13833; SKIP-CACHE-INV-NEXT:    s_endpgm
13834;
13835; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13836; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
13837; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13838; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13839; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13840; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13841; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13842; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13843; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13844; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13845; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13846; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
13847; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13848; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13849; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13850; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
13851; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
13852; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
13853;
13854; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13855; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
13856; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13857; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
13858; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
13859; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
13860; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13861; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
13862; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
13863; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13864; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13865; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
13866; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13867; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
13868; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13869; GFX90A-TGSPLIT-NEXT:    buffer_invl2
13870; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
13871; GFX90A-TGSPLIT-NEXT:    s_endpgm
13872;
13873; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13874; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
13875; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13876; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13877; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13878; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13879; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13880; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13881; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13882; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13883; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13884; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
13885; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13886; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13887; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13888; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
13889; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
13890;
13891; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13892; GFX940-TGSPLIT:       ; %bb.0: ; %entry
13893; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
13894; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
13895; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
13896; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
13897; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
13898; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
13899; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
13900; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
13901; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
13902; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
13903; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13904; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
13905; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
13906; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
13907; GFX940-TGSPLIT-NEXT:    s_endpgm
13908;
13909; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13910; GFX11-WGP:       ; %bb.0: ; %entry
13911; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
13912; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13913; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13914; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13915; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
13916; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
13917; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
13918; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13919; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
13920; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
13921; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13922; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13923; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
13924; GFX11-WGP-NEXT:    buffer_gl1_inv
13925; GFX11-WGP-NEXT:    buffer_gl0_inv
13926; GFX11-WGP-NEXT:    s_endpgm
13927;
13928; GFX11-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13929; GFX11-CU:       ; %bb.0: ; %entry
13930; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
13931; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13932; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13933; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13934; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
13935; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
13936; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
13937; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13938; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
13939; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
13940; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13941; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
13942; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
13943; GFX11-CU-NEXT:    buffer_gl1_inv
13944; GFX11-CU-NEXT:    buffer_gl0_inv
13945; GFX11-CU-NEXT:    s_endpgm
13946;
13947; GFX12-WGP-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13948; GFX12-WGP:       ; %bb.0: ; %entry
13949; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
13950; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13951; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
13952; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
13953; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
13954; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
13955; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
13956; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13957; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
13958; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
13959; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
13960; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
13961; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
13962; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13963; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13964; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
13965; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
13966; GFX12-WGP-NEXT:    s_endpgm
13967;
13968; GFX12-CU-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
13969; GFX12-CU:       ; %bb.0: ; %entry
13970; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
13971; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
13972; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
13973; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
13974; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
13975; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
13976; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
13977; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
13978; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
13979; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
13980; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
13981; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
13982; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
13983; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13984; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
13985; GFX12-CU-NEXT:    s_wait_storecnt 0x0
13986; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
13987; GFX12-CU-NEXT:    s_endpgm
13988    ptr addrspace(1) %out, i32 %in, i32 %old) {
13989entry:
13990  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
13991  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
13992  ret void
13993}
13994
13995define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
13996; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
13997; GFX6:       ; %bb.0: ; %entry
13998; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
13999; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14000; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14001; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14002; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14003; GFX6-NEXT:    s_mov_b32 s12, s5
14004; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14005; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14006; GFX6-NEXT:    s_mov_b32 s11, -1
14007; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14008; GFX6-NEXT:    s_mov_b32 s5, s12
14009; GFX6-NEXT:    s_mov_b32 s6, s11
14010; GFX6-NEXT:    s_mov_b32 s7, s10
14011; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14012; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14013; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14014; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14015; GFX6-NEXT:    s_waitcnt vmcnt(0)
14016; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14017; GFX6-NEXT:    s_waitcnt vmcnt(0)
14018; GFX6-NEXT:    buffer_wbinvl1
14019; GFX6-NEXT:    s_endpgm
14020;
14021; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14022; GFX7:       ; %bb.0: ; %entry
14023; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14024; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14025; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14026; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14027; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14028; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14029; GFX7-NEXT:    s_mov_b32 s4, s8
14030; GFX7-NEXT:    s_mov_b32 s5, s9
14031; GFX7-NEXT:    s_mov_b32 s9, s10
14032; GFX7-NEXT:    s_mov_b32 s8, s11
14033; GFX7-NEXT:    s_add_u32 s4, s4, s9
14034; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14035; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14036; GFX7-NEXT:    s_mov_b32 s5, s8
14037; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14038; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14039; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14040; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14041; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14042; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14043; GFX7-NEXT:    s_waitcnt vmcnt(0)
14044; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14045; GFX7-NEXT:    s_waitcnt vmcnt(0)
14046; GFX7-NEXT:    buffer_wbinvl1_vol
14047; GFX7-NEXT:    s_endpgm
14048;
14049; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14050; GFX10-WGP:       ; %bb.0: ; %entry
14051; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14052; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14053; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14054; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14055; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14056; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14057; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14058; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14059; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14060; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
14061; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14062; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14063; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14064; GFX10-WGP-NEXT:    buffer_gl1_inv
14065; GFX10-WGP-NEXT:    buffer_gl0_inv
14066; GFX10-WGP-NEXT:    s_endpgm
14067;
14068; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14069; GFX10-CU:       ; %bb.0: ; %entry
14070; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14071; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14072; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14073; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14074; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14075; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14076; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14077; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14078; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14079; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14080; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14081; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14082; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14083; GFX10-CU-NEXT:    buffer_gl1_inv
14084; GFX10-CU-NEXT:    buffer_gl0_inv
14085; GFX10-CU-NEXT:    s_endpgm
14086;
14087; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14088; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14089; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14090; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14091; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14092; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14093; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14094; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14095; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14096; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14097; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14098; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14099; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14100; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14101; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14102; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14103; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14104; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14105; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14106; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14107; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14108; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14109; SKIP-CACHE-INV-NEXT:    s_endpgm
14110;
14111; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14112; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14113; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14114; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14115; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14116; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14117; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14118; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14119; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14120; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14121; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14122; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
14123; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14124; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14125; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14126; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
14127; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14128; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14129;
14130; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14131; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14132; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14133; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14134; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14135; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14136; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14137; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14138; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14139; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14140; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14141; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
14142; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14143; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14144; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14145; GFX90A-TGSPLIT-NEXT:    buffer_invl2
14146; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14147; GFX90A-TGSPLIT-NEXT:    s_endpgm
14148;
14149; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14150; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14151; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14152; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14153; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14154; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14155; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14156; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14157; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14158; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14159; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14160; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
14161; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14162; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14163; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14164; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
14165; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14166;
14167; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14168; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14169; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14170; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14171; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14172; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14173; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14174; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14175; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14176; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14177; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14178; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
14179; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14180; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14181; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14182; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
14183; GFX940-TGSPLIT-NEXT:    s_endpgm
14184;
14185; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14186; GFX11-WGP:       ; %bb.0: ; %entry
14187; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14188; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14189; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14190; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14191; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14192; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14193; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14194; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14195; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14196; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
14197; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14198; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14199; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14200; GFX11-WGP-NEXT:    buffer_gl1_inv
14201; GFX11-WGP-NEXT:    buffer_gl0_inv
14202; GFX11-WGP-NEXT:    s_endpgm
14203;
14204; GFX11-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14205; GFX11-CU:       ; %bb.0: ; %entry
14206; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14207; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14208; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14209; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14210; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14211; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14212; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14213; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14214; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14215; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
14216; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14217; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14218; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14219; GFX11-CU-NEXT:    buffer_gl1_inv
14220; GFX11-CU-NEXT:    buffer_gl0_inv
14221; GFX11-CU-NEXT:    s_endpgm
14222;
14223; GFX12-WGP-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14224; GFX12-WGP:       ; %bb.0: ; %entry
14225; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14226; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14227; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14228; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14229; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14230; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14231; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14232; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14233; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14234; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
14235; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
14236; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
14237; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
14238; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14239; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14240; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14241; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
14242; GFX12-WGP-NEXT:    s_endpgm
14243;
14244; GFX12-CU-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
14245; GFX12-CU:       ; %bb.0: ; %entry
14246; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14247; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14248; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14249; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14250; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14251; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14252; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14253; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14254; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14255; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
14256; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
14257; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
14258; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
14259; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14260; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14261; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14262; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
14263; GFX12-CU-NEXT:    s_endpgm
14264    ptr addrspace(1) %out, i32 %in, i32 %old) {
14265entry:
14266  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14267  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
14268  ret void
14269}
14270
14271define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
14272; GFX6-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14273; GFX6:       ; %bb.0: ; %entry
14274; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14275; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14276; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14277; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14278; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14279; GFX6-NEXT:    s_mov_b32 s12, s5
14280; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14281; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14282; GFX6-NEXT:    s_mov_b32 s11, -1
14283; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14284; GFX6-NEXT:    s_mov_b32 s5, s12
14285; GFX6-NEXT:    s_mov_b32 s6, s11
14286; GFX6-NEXT:    s_mov_b32 s7, s10
14287; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14288; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14289; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14290; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14291; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14292; GFX6-NEXT:    s_waitcnt vmcnt(0)
14293; GFX6-NEXT:    buffer_wbinvl1
14294; GFX6-NEXT:    s_endpgm
14295;
14296; GFX7-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14297; GFX7:       ; %bb.0: ; %entry
14298; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14299; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14300; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14301; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14302; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14303; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14304; GFX7-NEXT:    s_mov_b32 s4, s8
14305; GFX7-NEXT:    s_mov_b32 s5, s9
14306; GFX7-NEXT:    s_mov_b32 s9, s10
14307; GFX7-NEXT:    s_mov_b32 s8, s11
14308; GFX7-NEXT:    s_add_u32 s4, s4, s9
14309; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14310; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14311; GFX7-NEXT:    s_mov_b32 s5, s8
14312; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14313; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14314; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14315; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14316; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14317; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14318; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14319; GFX7-NEXT:    s_waitcnt vmcnt(0)
14320; GFX7-NEXT:    buffer_wbinvl1_vol
14321; GFX7-NEXT:    s_endpgm
14322;
14323; GFX10-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14324; GFX10-WGP:       ; %bb.0: ; %entry
14325; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14326; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14327; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14328; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14329; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14330; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14331; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14332; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14333; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14334; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14335; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14336; GFX10-WGP-NEXT:    buffer_gl1_inv
14337; GFX10-WGP-NEXT:    buffer_gl0_inv
14338; GFX10-WGP-NEXT:    s_endpgm
14339;
14340; GFX10-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14341; GFX10-CU:       ; %bb.0: ; %entry
14342; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14343; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14344; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14345; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14346; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14347; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14348; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14349; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14350; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14351; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14352; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14353; GFX10-CU-NEXT:    buffer_gl1_inv
14354; GFX10-CU-NEXT:    buffer_gl0_inv
14355; GFX10-CU-NEXT:    s_endpgm
14356;
14357; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14358; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14359; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14360; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14361; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14362; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14363; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14364; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14365; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14366; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14367; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14368; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14369; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14370; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14371; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14372; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14373; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14374; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14375; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14376; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14377; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14378; SKIP-CACHE-INV-NEXT:    s_endpgm
14379;
14380; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14381; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14382; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14383; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14384; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14385; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14386; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14387; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14388; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14389; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14390; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14391; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14392; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14393; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
14394; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14395; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14396;
14397; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14398; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14399; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14400; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14401; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14402; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14403; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14404; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14405; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14406; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14407; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14408; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14409; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14410; GFX90A-TGSPLIT-NEXT:    buffer_invl2
14411; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14412; GFX90A-TGSPLIT-NEXT:    s_endpgm
14413;
14414; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14415; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14416; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14417; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14418; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14419; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14420; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14421; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14422; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14423; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14424; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14425; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14426; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14427; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
14428; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14429;
14430; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14431; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14432; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14433; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14434; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14435; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14436; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14437; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14438; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14439; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14440; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14441; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14442; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14443; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
14444; GFX940-TGSPLIT-NEXT:    s_endpgm
14445;
14446; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14447; GFX11-WGP:       ; %bb.0: ; %entry
14448; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14449; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14450; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14451; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14452; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14453; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14454; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14455; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14456; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14457; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14458; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14459; GFX11-WGP-NEXT:    buffer_gl1_inv
14460; GFX11-WGP-NEXT:    buffer_gl0_inv
14461; GFX11-WGP-NEXT:    s_endpgm
14462;
14463; GFX11-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14464; GFX11-CU:       ; %bb.0: ; %entry
14465; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14466; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14467; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14468; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14469; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14470; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14471; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14472; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14473; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14474; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14475; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14476; GFX11-CU-NEXT:    buffer_gl1_inv
14477; GFX11-CU-NEXT:    buffer_gl0_inv
14478; GFX11-CU-NEXT:    s_endpgm
14479;
14480; GFX12-WGP-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14481; GFX12-WGP:       ; %bb.0: ; %entry
14482; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14483; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14484; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14485; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14486; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14487; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14488; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14489; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14490; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14491; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14492; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14493; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
14494; GFX12-WGP-NEXT:    s_endpgm
14495;
14496; GFX12-CU-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
14497; GFX12-CU:       ; %bb.0: ; %entry
14498; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14499; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14500; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14501; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14502; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14503; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14504; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14505; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14506; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14507; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14508; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14509; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
14510; GFX12-CU-NEXT:    s_endpgm
14511    ptr addrspace(1) %out, i32 %in, i32 %old) {
14512entry:
14513  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14514  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire
14515  ret void
14516}
14517
14518define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
14519; GFX6-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14520; GFX6:       ; %bb.0: ; %entry
14521; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14522; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14523; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14524; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14525; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14526; GFX6-NEXT:    s_mov_b32 s12, s5
14527; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14528; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14529; GFX6-NEXT:    s_mov_b32 s11, -1
14530; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14531; GFX6-NEXT:    s_mov_b32 s5, s12
14532; GFX6-NEXT:    s_mov_b32 s6, s11
14533; GFX6-NEXT:    s_mov_b32 s7, s10
14534; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14535; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14536; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14537; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14538; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14539; GFX6-NEXT:    s_waitcnt vmcnt(0)
14540; GFX6-NEXT:    buffer_wbinvl1
14541; GFX6-NEXT:    s_endpgm
14542;
14543; GFX7-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14544; GFX7:       ; %bb.0: ; %entry
14545; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14546; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14547; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14548; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14549; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14550; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14551; GFX7-NEXT:    s_mov_b32 s4, s8
14552; GFX7-NEXT:    s_mov_b32 s5, s9
14553; GFX7-NEXT:    s_mov_b32 s9, s10
14554; GFX7-NEXT:    s_mov_b32 s8, s11
14555; GFX7-NEXT:    s_add_u32 s4, s4, s9
14556; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14557; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14558; GFX7-NEXT:    s_mov_b32 s5, s8
14559; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14560; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14561; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14562; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14563; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14564; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14565; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14566; GFX7-NEXT:    s_waitcnt vmcnt(0)
14567; GFX7-NEXT:    buffer_wbinvl1_vol
14568; GFX7-NEXT:    s_endpgm
14569;
14570; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14571; GFX10-WGP:       ; %bb.0: ; %entry
14572; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14573; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14574; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14575; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14576; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14577; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14578; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14579; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14580; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14581; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14582; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14583; GFX10-WGP-NEXT:    buffer_gl1_inv
14584; GFX10-WGP-NEXT:    buffer_gl0_inv
14585; GFX10-WGP-NEXT:    s_endpgm
14586;
14587; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14588; GFX10-CU:       ; %bb.0: ; %entry
14589; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14590; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14591; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14592; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14593; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14594; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14595; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14596; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14597; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14598; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14599; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14600; GFX10-CU-NEXT:    buffer_gl1_inv
14601; GFX10-CU-NEXT:    buffer_gl0_inv
14602; GFX10-CU-NEXT:    s_endpgm
14603;
14604; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14605; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14606; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14607; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14608; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14609; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14610; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14611; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14612; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14613; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14614; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14615; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14616; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14617; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14618; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14619; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14620; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14621; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14622; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14623; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14624; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14625; SKIP-CACHE-INV-NEXT:    s_endpgm
14626;
14627; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14628; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14629; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14630; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14631; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14632; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14633; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14634; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14635; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14636; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14637; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14638; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14639; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14640; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
14641; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14642; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14643;
14644; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14645; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14646; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14647; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14648; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14649; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14650; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14651; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14652; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14653; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14654; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14655; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14656; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14657; GFX90A-TGSPLIT-NEXT:    buffer_invl2
14658; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14659; GFX90A-TGSPLIT-NEXT:    s_endpgm
14660;
14661; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14662; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14663; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14664; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14665; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14666; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14667; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14668; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14669; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14670; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14671; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14672; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14673; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14674; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
14675; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14676;
14677; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14678; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14679; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14680; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14681; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14682; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14683; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14684; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14685; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14686; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14687; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14688; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14689; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14690; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
14691; GFX940-TGSPLIT-NEXT:    s_endpgm
14692;
14693; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14694; GFX11-WGP:       ; %bb.0: ; %entry
14695; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14696; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14697; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14698; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14699; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14700; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14701; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14702; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14703; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14704; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14705; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14706; GFX11-WGP-NEXT:    buffer_gl1_inv
14707; GFX11-WGP-NEXT:    buffer_gl0_inv
14708; GFX11-WGP-NEXT:    s_endpgm
14709;
14710; GFX11-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14711; GFX11-CU:       ; %bb.0: ; %entry
14712; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14713; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14714; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14715; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14716; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14717; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14718; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14719; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14720; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14721; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14722; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14723; GFX11-CU-NEXT:    buffer_gl1_inv
14724; GFX11-CU-NEXT:    buffer_gl0_inv
14725; GFX11-CU-NEXT:    s_endpgm
14726;
14727; GFX12-WGP-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14728; GFX12-WGP:       ; %bb.0: ; %entry
14729; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14730; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14731; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14732; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14733; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
14734; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
14735; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
14736; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14737; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
14738; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14739; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
14740; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
14741; GFX12-WGP-NEXT:    s_endpgm
14742;
14743; GFX12-CU-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
14744; GFX12-CU:       ; %bb.0: ; %entry
14745; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
14746; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14747; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14748; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14749; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
14750; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
14751; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
14752; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14753; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
14754; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
14755; GFX12-CU-NEXT:    s_wait_storecnt 0x0
14756; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
14757; GFX12-CU-NEXT:    s_endpgm
14758    ptr addrspace(1) %out, i32 %in, i32 %old) {
14759entry:
14760  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
14761  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
14762  ret void
14763}
14764
14765define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
14766; GFX6-LABEL: global_system_one_as_release_acquire_cmpxchg:
14767; GFX6:       ; %bb.0: ; %entry
14768; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
14769; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
14770; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
14771; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
14772; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
14773; GFX6-NEXT:    s_mov_b32 s12, s5
14774; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
14775; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
14776; GFX6-NEXT:    s_mov_b32 s11, -1
14777; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
14778; GFX6-NEXT:    s_mov_b32 s5, s12
14779; GFX6-NEXT:    s_mov_b32 s6, s11
14780; GFX6-NEXT:    s_mov_b32 s7, s10
14781; GFX6-NEXT:    v_mov_b32_e32 v0, s9
14782; GFX6-NEXT:    v_mov_b32_e32 v2, s8
14783; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14784; GFX6-NEXT:    v_mov_b32_e32 v1, v2
14785; GFX6-NEXT:    s_waitcnt vmcnt(0)
14786; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
14787; GFX6-NEXT:    s_waitcnt vmcnt(0)
14788; GFX6-NEXT:    buffer_wbinvl1
14789; GFX6-NEXT:    s_endpgm
14790;
14791; GFX7-LABEL: global_system_one_as_release_acquire_cmpxchg:
14792; GFX7:       ; %bb.0: ; %entry
14793; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
14794; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
14795; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
14796; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
14797; GFX7-NEXT:    s_mov_b64 s[10:11], 16
14798; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
14799; GFX7-NEXT:    s_mov_b32 s4, s8
14800; GFX7-NEXT:    s_mov_b32 s5, s9
14801; GFX7-NEXT:    s_mov_b32 s9, s10
14802; GFX7-NEXT:    s_mov_b32 s8, s11
14803; GFX7-NEXT:    s_add_u32 s4, s4, s9
14804; GFX7-NEXT:    s_addc_u32 s8, s5, s8
14805; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
14806; GFX7-NEXT:    s_mov_b32 s5, s8
14807; GFX7-NEXT:    v_mov_b32_e32 v2, s7
14808; GFX7-NEXT:    v_mov_b32_e32 v0, s6
14809; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14810; GFX7-NEXT:    v_mov_b32_e32 v3, v0
14811; GFX7-NEXT:    v_mov_b32_e32 v0, s4
14812; GFX7-NEXT:    v_mov_b32_e32 v1, s5
14813; GFX7-NEXT:    s_waitcnt vmcnt(0)
14814; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
14815; GFX7-NEXT:    s_waitcnt vmcnt(0)
14816; GFX7-NEXT:    buffer_wbinvl1_vol
14817; GFX7-NEXT:    s_endpgm
14818;
14819; GFX10-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg:
14820; GFX10-WGP:       ; %bb.0: ; %entry
14821; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
14822; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14823; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
14824; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
14825; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14826; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
14827; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
14828; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14829; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
14830; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
14831; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14832; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14833; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14834; GFX10-WGP-NEXT:    buffer_gl1_inv
14835; GFX10-WGP-NEXT:    buffer_gl0_inv
14836; GFX10-WGP-NEXT:    s_endpgm
14837;
14838; GFX10-CU-LABEL: global_system_one_as_release_acquire_cmpxchg:
14839; GFX10-CU:       ; %bb.0: ; %entry
14840; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
14841; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14842; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
14843; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
14844; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
14845; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
14846; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
14847; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14848; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
14849; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
14850; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14851; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
14852; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14853; GFX10-CU-NEXT:    buffer_gl1_inv
14854; GFX10-CU-NEXT:    buffer_gl0_inv
14855; GFX10-CU-NEXT:    s_endpgm
14856;
14857; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg:
14858; SKIP-CACHE-INV:       ; %bb.0: ; %entry
14859; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
14860; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
14861; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
14862; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
14863; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
14864; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
14865; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
14866; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
14867; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
14868; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
14869; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
14870; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
14871; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
14872; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
14873; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
14874; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
14875; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
14876; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14877; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
14878; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
14879; SKIP-CACHE-INV-NEXT:    s_endpgm
14880;
14881; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg:
14882; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
14883; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14884; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14885; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14886; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14887; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14888; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14889; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14890; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14891; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14892; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
14893; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14894; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14895; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14896; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
14897; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
14898; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
14899;
14900; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg:
14901; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
14902; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14903; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
14904; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
14905; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
14906; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14907; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
14908; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
14909; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14910; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14911; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
14912; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14913; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
14914; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14915; GFX90A-TGSPLIT-NEXT:    buffer_invl2
14916; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
14917; GFX90A-TGSPLIT-NEXT:    s_endpgm
14918;
14919; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg:
14920; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
14921; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14922; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14923; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14924; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14925; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14926; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14927; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14928; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14929; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14930; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
14931; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14932; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14933; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14934; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
14935; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
14936;
14937; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_cmpxchg:
14938; GFX940-TGSPLIT:       ; %bb.0: ; %entry
14939; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
14940; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
14941; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
14942; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
14943; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
14944; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
14945; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
14946; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
14947; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
14948; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
14949; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14950; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
14951; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
14952; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
14953; GFX940-TGSPLIT-NEXT:    s_endpgm
14954;
14955; GFX11-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg:
14956; GFX11-WGP:       ; %bb.0: ; %entry
14957; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
14958; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14959; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14960; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14961; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
14962; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
14963; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
14964; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14965; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
14966; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
14967; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14968; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14969; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
14970; GFX11-WGP-NEXT:    buffer_gl1_inv
14971; GFX11-WGP-NEXT:    buffer_gl0_inv
14972; GFX11-WGP-NEXT:    s_endpgm
14973;
14974; GFX11-CU-LABEL: global_system_one_as_release_acquire_cmpxchg:
14975; GFX11-CU:       ; %bb.0: ; %entry
14976; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
14977; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14978; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
14979; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
14980; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
14981; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
14982; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
14983; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
14984; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
14985; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
14986; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14987; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
14988; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
14989; GFX11-CU-NEXT:    buffer_gl1_inv
14990; GFX11-CU-NEXT:    buffer_gl0_inv
14991; GFX11-CU-NEXT:    s_endpgm
14992;
14993; GFX12-WGP-LABEL: global_system_one_as_release_acquire_cmpxchg:
14994; GFX12-WGP:       ; %bb.0: ; %entry
14995; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
14996; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
14997; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
14998; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
14999; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15000; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15001; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15002; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15003; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15004; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
15005; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15006; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15007; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15008; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15009; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15010; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15011; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
15012; GFX12-WGP-NEXT:    s_endpgm
15013;
15014; GFX12-CU-LABEL: global_system_one_as_release_acquire_cmpxchg:
15015; GFX12-CU:       ; %bb.0: ; %entry
15016; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15017; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15018; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15019; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15020; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15021; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15022; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15023; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15024; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15025; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
15026; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15027; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15028; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15029; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15030; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15031; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15032; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
15033; GFX12-CU-NEXT:    s_endpgm
15034    ptr addrspace(1) %out, i32 %in, i32 %old) {
15035entry:
15036  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15037  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire
15038  ret void
15039}
15040
15041define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
15042; GFX6-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15043; GFX6:       ; %bb.0: ; %entry
15044; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15045; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15046; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15047; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15048; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15049; GFX6-NEXT:    s_mov_b32 s12, s5
15050; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15051; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15052; GFX6-NEXT:    s_mov_b32 s11, -1
15053; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15054; GFX6-NEXT:    s_mov_b32 s5, s12
15055; GFX6-NEXT:    s_mov_b32 s6, s11
15056; GFX6-NEXT:    s_mov_b32 s7, s10
15057; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15058; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15059; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15060; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15061; GFX6-NEXT:    s_waitcnt vmcnt(0)
15062; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15063; GFX6-NEXT:    s_waitcnt vmcnt(0)
15064; GFX6-NEXT:    buffer_wbinvl1
15065; GFX6-NEXT:    s_endpgm
15066;
15067; GFX7-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15068; GFX7:       ; %bb.0: ; %entry
15069; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15070; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15071; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15072; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15073; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15074; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15075; GFX7-NEXT:    s_mov_b32 s4, s8
15076; GFX7-NEXT:    s_mov_b32 s5, s9
15077; GFX7-NEXT:    s_mov_b32 s9, s10
15078; GFX7-NEXT:    s_mov_b32 s8, s11
15079; GFX7-NEXT:    s_add_u32 s4, s4, s9
15080; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15081; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15082; GFX7-NEXT:    s_mov_b32 s5, s8
15083; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15084; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15085; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15086; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15087; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15088; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15089; GFX7-NEXT:    s_waitcnt vmcnt(0)
15090; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15091; GFX7-NEXT:    s_waitcnt vmcnt(0)
15092; GFX7-NEXT:    buffer_wbinvl1_vol
15093; GFX7-NEXT:    s_endpgm
15094;
15095; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15096; GFX10-WGP:       ; %bb.0: ; %entry
15097; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15098; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15099; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15100; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15101; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15102; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15103; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15104; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15105; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15106; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15107; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15108; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15109; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15110; GFX10-WGP-NEXT:    buffer_gl1_inv
15111; GFX10-WGP-NEXT:    buffer_gl0_inv
15112; GFX10-WGP-NEXT:    s_endpgm
15113;
15114; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15115; GFX10-CU:       ; %bb.0: ; %entry
15116; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15117; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15118; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15119; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15120; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15121; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15122; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15123; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15124; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15125; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15126; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15127; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15128; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15129; GFX10-CU-NEXT:    buffer_gl1_inv
15130; GFX10-CU-NEXT:    buffer_gl0_inv
15131; GFX10-CU-NEXT:    s_endpgm
15132;
15133; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15134; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15135; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15136; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15137; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15138; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15139; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15140; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15141; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15142; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15143; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15144; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15145; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15146; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15147; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15148; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15149; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15150; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15151; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15152; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15153; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15154; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15155; SKIP-CACHE-INV-NEXT:    s_endpgm
15156;
15157; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15158; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15159; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15160; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15161; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15162; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15163; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15164; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15165; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15166; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15167; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15168; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
15169; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15170; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15171; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15172; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
15173; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15174; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15175;
15176; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15177; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15178; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15179; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15180; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15181; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15182; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15183; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15184; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15185; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15186; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15187; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
15188; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15189; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15190; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15191; GFX90A-TGSPLIT-NEXT:    buffer_invl2
15192; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15193; GFX90A-TGSPLIT-NEXT:    s_endpgm
15194;
15195; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15196; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15197; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15198; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15199; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15200; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15201; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15202; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15203; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15204; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15205; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15206; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15207; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15208; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15209; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15210; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
15211; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15212;
15213; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15214; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15215; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15216; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15217; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15218; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15219; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15220; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15221; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15222; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15223; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15224; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15225; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15226; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15227; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15228; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
15229; GFX940-TGSPLIT-NEXT:    s_endpgm
15230;
15231; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15232; GFX11-WGP:       ; %bb.0: ; %entry
15233; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15234; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15235; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15236; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15237; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15238; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15239; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15240; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15241; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15242; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
15243; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15244; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15245; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15246; GFX11-WGP-NEXT:    buffer_gl1_inv
15247; GFX11-WGP-NEXT:    buffer_gl0_inv
15248; GFX11-WGP-NEXT:    s_endpgm
15249;
15250; GFX11-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15251; GFX11-CU:       ; %bb.0: ; %entry
15252; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15253; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15254; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15255; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15256; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15257; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15258; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15259; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15260; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15261; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
15262; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15263; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15264; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15265; GFX11-CU-NEXT:    buffer_gl1_inv
15266; GFX11-CU-NEXT:    buffer_gl0_inv
15267; GFX11-CU-NEXT:    s_endpgm
15268;
15269; GFX12-WGP-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15270; GFX12-WGP:       ; %bb.0: ; %entry
15271; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15272; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15273; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15274; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15275; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15276; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15277; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15278; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15279; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15280; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
15281; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15282; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15283; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15284; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15285; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15286; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15287; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
15288; GFX12-WGP-NEXT:    s_endpgm
15289;
15290; GFX12-CU-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
15291; GFX12-CU:       ; %bb.0: ; %entry
15292; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15293; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15294; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15295; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15296; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15297; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15298; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15299; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15300; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15301; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
15302; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15303; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15304; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15305; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15306; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15307; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15308; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
15309; GFX12-CU-NEXT:    s_endpgm
15310    ptr addrspace(1) %out, i32 %in, i32 %old) {
15311entry:
15312  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15313  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
15314  ret void
15315}
15316
15317define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
15318; GFX6-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15319; GFX6:       ; %bb.0: ; %entry
15320; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15321; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15322; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15323; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15324; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15325; GFX6-NEXT:    s_mov_b32 s12, s5
15326; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15327; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15328; GFX6-NEXT:    s_mov_b32 s11, -1
15329; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15330; GFX6-NEXT:    s_mov_b32 s5, s12
15331; GFX6-NEXT:    s_mov_b32 s6, s11
15332; GFX6-NEXT:    s_mov_b32 s7, s10
15333; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15334; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15335; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15336; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15337; GFX6-NEXT:    s_waitcnt vmcnt(0)
15338; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15339; GFX6-NEXT:    s_waitcnt vmcnt(0)
15340; GFX6-NEXT:    buffer_wbinvl1
15341; GFX6-NEXT:    s_endpgm
15342;
15343; GFX7-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15344; GFX7:       ; %bb.0: ; %entry
15345; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15346; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15347; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15348; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15349; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15350; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15351; GFX7-NEXT:    s_mov_b32 s4, s8
15352; GFX7-NEXT:    s_mov_b32 s5, s9
15353; GFX7-NEXT:    s_mov_b32 s9, s10
15354; GFX7-NEXT:    s_mov_b32 s8, s11
15355; GFX7-NEXT:    s_add_u32 s4, s4, s9
15356; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15357; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15358; GFX7-NEXT:    s_mov_b32 s5, s8
15359; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15360; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15361; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15362; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15363; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15364; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15365; GFX7-NEXT:    s_waitcnt vmcnt(0)
15366; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15367; GFX7-NEXT:    s_waitcnt vmcnt(0)
15368; GFX7-NEXT:    buffer_wbinvl1_vol
15369; GFX7-NEXT:    s_endpgm
15370;
15371; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15372; GFX10-WGP:       ; %bb.0: ; %entry
15373; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15374; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15375; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15376; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15377; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15378; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15379; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15380; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15381; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15382; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15383; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15384; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15385; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15386; GFX10-WGP-NEXT:    buffer_gl1_inv
15387; GFX10-WGP-NEXT:    buffer_gl0_inv
15388; GFX10-WGP-NEXT:    s_endpgm
15389;
15390; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15391; GFX10-CU:       ; %bb.0: ; %entry
15392; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15393; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15394; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15395; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15396; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15397; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15398; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15399; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15400; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15401; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15402; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15403; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15404; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15405; GFX10-CU-NEXT:    buffer_gl1_inv
15406; GFX10-CU-NEXT:    buffer_gl0_inv
15407; GFX10-CU-NEXT:    s_endpgm
15408;
15409; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15410; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15411; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15412; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15413; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15414; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15415; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15416; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15417; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15418; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15419; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15420; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15421; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15422; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15423; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15424; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15425; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15426; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15427; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15428; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15429; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15430; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15431; SKIP-CACHE-INV-NEXT:    s_endpgm
15432;
15433; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15434; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15435; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15436; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15437; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15438; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15439; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15440; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15441; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15442; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15443; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15444; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
15445; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15446; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15447; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15448; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
15449; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15450; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15451;
15452; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15453; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15454; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15455; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15456; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15457; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15458; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15459; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15460; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15461; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15462; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15463; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
15464; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15465; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15466; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15467; GFX90A-TGSPLIT-NEXT:    buffer_invl2
15468; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15469; GFX90A-TGSPLIT-NEXT:    s_endpgm
15470;
15471; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15472; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15473; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15474; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15475; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15476; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15477; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15478; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15479; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15480; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15481; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15482; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15483; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15484; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15485; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15486; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
15487; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15488;
15489; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15490; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15491; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15492; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15493; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15494; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15495; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15496; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15497; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15498; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15499; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15500; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15501; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15502; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15503; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15504; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
15505; GFX940-TGSPLIT-NEXT:    s_endpgm
15506;
15507; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15508; GFX11-WGP:       ; %bb.0: ; %entry
15509; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15510; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15511; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15512; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15513; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15514; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15515; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15516; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15517; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15518; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
15519; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15520; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15521; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15522; GFX11-WGP-NEXT:    buffer_gl1_inv
15523; GFX11-WGP-NEXT:    buffer_gl0_inv
15524; GFX11-WGP-NEXT:    s_endpgm
15525;
15526; GFX11-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15527; GFX11-CU:       ; %bb.0: ; %entry
15528; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15529; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15530; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15531; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15532; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15533; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15534; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15535; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15536; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15537; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
15538; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15539; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15540; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15541; GFX11-CU-NEXT:    buffer_gl1_inv
15542; GFX11-CU-NEXT:    buffer_gl0_inv
15543; GFX11-CU-NEXT:    s_endpgm
15544;
15545; GFX12-WGP-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15546; GFX12-WGP:       ; %bb.0: ; %entry
15547; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15548; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15549; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15550; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15551; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15552; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15553; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15554; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15555; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15556; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
15557; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15558; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15559; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15560; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15561; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15562; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15563; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
15564; GFX12-WGP-NEXT:    s_endpgm
15565;
15566; GFX12-CU-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
15567; GFX12-CU:       ; %bb.0: ; %entry
15568; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15569; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15570; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15571; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15572; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15573; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15574; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15575; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15576; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15577; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
15578; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15579; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15580; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15581; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15582; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15583; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15584; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
15585; GFX12-CU-NEXT:    s_endpgm
15586    ptr addrspace(1) %out, i32 %in, i32 %old) {
15587entry:
15588  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15589  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
15590  ret void
15591}
15592
15593define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
15594; GFX6-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15595; GFX6:       ; %bb.0: ; %entry
15596; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15597; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15598; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15599; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15600; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15601; GFX6-NEXT:    s_mov_b32 s12, s5
15602; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15603; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15604; GFX6-NEXT:    s_mov_b32 s11, -1
15605; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15606; GFX6-NEXT:    s_mov_b32 s5, s12
15607; GFX6-NEXT:    s_mov_b32 s6, s11
15608; GFX6-NEXT:    s_mov_b32 s7, s10
15609; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15610; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15611; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15612; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15613; GFX6-NEXT:    s_waitcnt vmcnt(0)
15614; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15615; GFX6-NEXT:    s_waitcnt vmcnt(0)
15616; GFX6-NEXT:    buffer_wbinvl1
15617; GFX6-NEXT:    s_endpgm
15618;
15619; GFX7-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15620; GFX7:       ; %bb.0: ; %entry
15621; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15622; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15623; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15624; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15625; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15626; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15627; GFX7-NEXT:    s_mov_b32 s4, s8
15628; GFX7-NEXT:    s_mov_b32 s5, s9
15629; GFX7-NEXT:    s_mov_b32 s9, s10
15630; GFX7-NEXT:    s_mov_b32 s8, s11
15631; GFX7-NEXT:    s_add_u32 s4, s4, s9
15632; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15633; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15634; GFX7-NEXT:    s_mov_b32 s5, s8
15635; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15636; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15637; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15638; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15639; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15640; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15641; GFX7-NEXT:    s_waitcnt vmcnt(0)
15642; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15643; GFX7-NEXT:    s_waitcnt vmcnt(0)
15644; GFX7-NEXT:    buffer_wbinvl1_vol
15645; GFX7-NEXT:    s_endpgm
15646;
15647; GFX10-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15648; GFX10-WGP:       ; %bb.0: ; %entry
15649; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15650; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15651; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15652; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15653; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15654; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15655; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15656; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15657; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15658; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15659; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15660; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15661; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15662; GFX10-WGP-NEXT:    buffer_gl1_inv
15663; GFX10-WGP-NEXT:    buffer_gl0_inv
15664; GFX10-WGP-NEXT:    s_endpgm
15665;
15666; GFX10-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15667; GFX10-CU:       ; %bb.0: ; %entry
15668; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15669; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15670; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15671; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15672; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15673; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15674; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15675; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15676; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15677; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15678; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15679; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15680; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15681; GFX10-CU-NEXT:    buffer_gl1_inv
15682; GFX10-CU-NEXT:    buffer_gl0_inv
15683; GFX10-CU-NEXT:    s_endpgm
15684;
15685; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15686; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15687; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15688; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15689; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15690; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15691; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15692; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15693; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15694; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15695; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15696; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15697; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15698; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15699; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15700; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15701; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15702; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15703; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15704; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15705; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15706; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15707; SKIP-CACHE-INV-NEXT:    s_endpgm
15708;
15709; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15710; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15711; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15712; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15713; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15714; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15715; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15716; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15717; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15718; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15719; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15720; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
15721; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15722; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15723; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15724; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
15725; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
15726; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
15727;
15728; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15729; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
15730; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15731; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15732; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15733; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15734; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15735; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15736; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15737; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15738; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15739; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
15740; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15741; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15742; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15743; GFX90A-TGSPLIT-NEXT:    buffer_invl2
15744; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
15745; GFX90A-TGSPLIT-NEXT:    s_endpgm
15746;
15747; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15748; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
15749; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15750; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15751; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15752; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15753; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15754; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15755; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15756; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15757; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15758; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15759; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15760; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15761; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15762; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
15763; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
15764;
15765; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15766; GFX940-TGSPLIT:       ; %bb.0: ; %entry
15767; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15768; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
15769; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
15770; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
15771; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15772; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
15773; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
15774; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15775; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15776; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
15777; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15778; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
15779; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15780; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
15781; GFX940-TGSPLIT-NEXT:    s_endpgm
15782;
15783; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15784; GFX11-WGP:       ; %bb.0: ; %entry
15785; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
15786; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15787; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15788; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15789; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15790; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
15791; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
15792; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15793; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
15794; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
15795; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15796; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15797; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15798; GFX11-WGP-NEXT:    buffer_gl1_inv
15799; GFX11-WGP-NEXT:    buffer_gl0_inv
15800; GFX11-WGP-NEXT:    s_endpgm
15801;
15802; GFX11-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15803; GFX11-CU:       ; %bb.0: ; %entry
15804; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
15805; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15806; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15807; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15808; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
15809; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
15810; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
15811; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15812; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
15813; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
15814; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15815; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
15816; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15817; GFX11-CU-NEXT:    buffer_gl1_inv
15818; GFX11-CU-NEXT:    buffer_gl0_inv
15819; GFX11-CU-NEXT:    s_endpgm
15820;
15821; GFX12-WGP-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15822; GFX12-WGP:       ; %bb.0: ; %entry
15823; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
15824; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15825; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
15826; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
15827; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
15828; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
15829; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
15830; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15831; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
15832; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
15833; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
15834; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
15835; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
15836; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15837; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15838; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
15839; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
15840; GFX12-WGP-NEXT:    s_endpgm
15841;
15842; GFX12-CU-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
15843; GFX12-CU:       ; %bb.0: ; %entry
15844; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
15845; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
15846; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
15847; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
15848; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
15849; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
15850; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
15851; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15852; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
15853; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
15854; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
15855; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
15856; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
15857; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15858; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
15859; GFX12-CU-NEXT:    s_wait_storecnt 0x0
15860; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
15861; GFX12-CU-NEXT:    s_endpgm
15862    ptr addrspace(1) %out, i32 %in, i32 %old) {
15863entry:
15864  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
15865  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst
15866  ret void
15867}
15868
15869define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
15870; GFX6-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15871; GFX6:       ; %bb.0: ; %entry
15872; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
15873; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
15874; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
15875; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
15876; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
15877; GFX6-NEXT:    s_mov_b32 s12, s5
15878; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
15879; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
15880; GFX6-NEXT:    s_mov_b32 s11, -1
15881; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
15882; GFX6-NEXT:    s_mov_b32 s5, s12
15883; GFX6-NEXT:    s_mov_b32 s6, s11
15884; GFX6-NEXT:    s_mov_b32 s7, s10
15885; GFX6-NEXT:    v_mov_b32_e32 v0, s9
15886; GFX6-NEXT:    v_mov_b32_e32 v2, s8
15887; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15888; GFX6-NEXT:    v_mov_b32_e32 v1, v2
15889; GFX6-NEXT:    s_waitcnt vmcnt(0)
15890; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
15891; GFX6-NEXT:    s_waitcnt vmcnt(0)
15892; GFX6-NEXT:    buffer_wbinvl1
15893; GFX6-NEXT:    s_endpgm
15894;
15895; GFX7-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15896; GFX7:       ; %bb.0: ; %entry
15897; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
15898; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
15899; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
15900; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
15901; GFX7-NEXT:    s_mov_b64 s[10:11], 16
15902; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
15903; GFX7-NEXT:    s_mov_b32 s4, s8
15904; GFX7-NEXT:    s_mov_b32 s5, s9
15905; GFX7-NEXT:    s_mov_b32 s9, s10
15906; GFX7-NEXT:    s_mov_b32 s8, s11
15907; GFX7-NEXT:    s_add_u32 s4, s4, s9
15908; GFX7-NEXT:    s_addc_u32 s8, s5, s8
15909; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
15910; GFX7-NEXT:    s_mov_b32 s5, s8
15911; GFX7-NEXT:    v_mov_b32_e32 v2, s7
15912; GFX7-NEXT:    v_mov_b32_e32 v0, s6
15913; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15914; GFX7-NEXT:    v_mov_b32_e32 v3, v0
15915; GFX7-NEXT:    v_mov_b32_e32 v0, s4
15916; GFX7-NEXT:    v_mov_b32_e32 v1, s5
15917; GFX7-NEXT:    s_waitcnt vmcnt(0)
15918; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
15919; GFX7-NEXT:    s_waitcnt vmcnt(0)
15920; GFX7-NEXT:    buffer_wbinvl1_vol
15921; GFX7-NEXT:    s_endpgm
15922;
15923; GFX10-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15924; GFX10-WGP:       ; %bb.0: ; %entry
15925; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
15926; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15927; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
15928; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
15929; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
15930; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
15931; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
15932; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15933; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
15934; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
15935; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15936; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15937; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
15938; GFX10-WGP-NEXT:    buffer_gl1_inv
15939; GFX10-WGP-NEXT:    buffer_gl0_inv
15940; GFX10-WGP-NEXT:    s_endpgm
15941;
15942; GFX10-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15943; GFX10-CU:       ; %bb.0: ; %entry
15944; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
15945; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15946; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
15947; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
15948; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
15949; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
15950; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
15951; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
15952; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
15953; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
15954; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15955; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
15956; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
15957; GFX10-CU-NEXT:    buffer_gl1_inv
15958; GFX10-CU-NEXT:    buffer_gl0_inv
15959; GFX10-CU-NEXT:    s_endpgm
15960;
15961; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15962; SKIP-CACHE-INV:       ; %bb.0: ; %entry
15963; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
15964; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
15965; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
15966; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
15967; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
15968; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
15969; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
15970; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
15971; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
15972; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
15973; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
15974; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
15975; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
15976; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
15977; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
15978; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
15979; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
15980; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15981; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
15982; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
15983; SKIP-CACHE-INV-NEXT:    s_endpgm
15984;
15985; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
15986; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
15987; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
15988; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
15989; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
15990; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
15991; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
15992; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
15993; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
15994; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
15995; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
15996; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
15997; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
15998; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
15999; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16000; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
16001; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16002; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16003;
16004; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16005; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16006; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16007; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16008; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16009; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16010; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16011; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16012; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16013; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16014; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16015; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
16016; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16017; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16018; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16019; GFX90A-TGSPLIT-NEXT:    buffer_invl2
16020; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16021; GFX90A-TGSPLIT-NEXT:    s_endpgm
16022;
16023; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16024; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16025; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16026; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16027; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16028; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16029; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16030; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16031; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16032; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16033; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16034; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16035; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16036; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16037; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16038; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
16039; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16040;
16041; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16042; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16043; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16044; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16045; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16046; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16047; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16048; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16049; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16050; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16051; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16052; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16053; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16054; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16055; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16056; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
16057; GFX940-TGSPLIT-NEXT:    s_endpgm
16058;
16059; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16060; GFX11-WGP:       ; %bb.0: ; %entry
16061; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16062; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16063; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16064; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16065; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16066; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16067; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16068; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16069; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16070; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16071; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16072; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16073; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16074; GFX11-WGP-NEXT:    buffer_gl1_inv
16075; GFX11-WGP-NEXT:    buffer_gl0_inv
16076; GFX11-WGP-NEXT:    s_endpgm
16077;
16078; GFX11-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16079; GFX11-CU:       ; %bb.0: ; %entry
16080; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16081; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16082; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16083; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16084; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16085; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16086; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16087; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16088; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16089; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16090; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16091; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16092; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16093; GFX11-CU-NEXT:    buffer_gl1_inv
16094; GFX11-CU-NEXT:    buffer_gl0_inv
16095; GFX11-CU-NEXT:    s_endpgm
16096;
16097; GFX12-WGP-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16098; GFX12-WGP:       ; %bb.0: ; %entry
16099; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16100; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16101; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16102; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16103; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16104; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16105; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16106; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16107; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16108; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
16109; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16110; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16111; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16112; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16113; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16114; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16115; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
16116; GFX12-WGP-NEXT:    s_endpgm
16117;
16118; GFX12-CU-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
16119; GFX12-CU:       ; %bb.0: ; %entry
16120; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16121; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16122; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16123; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16124; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16125; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16126; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16127; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16128; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16129; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
16130; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16131; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16132; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16133; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16134; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16135; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16136; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
16137; GFX12-CU-NEXT:    s_endpgm
16138    ptr addrspace(1) %out, i32 %in, i32 %old) {
16139entry:
16140  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16141  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst
16142  ret void
16143}
16144
16145define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
16146; GFX6-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16147; GFX6:       ; %bb.0: ; %entry
16148; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16149; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16150; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16151; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16152; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16153; GFX6-NEXT:    s_mov_b32 s12, s5
16154; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16155; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16156; GFX6-NEXT:    s_mov_b32 s11, -1
16157; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16158; GFX6-NEXT:    s_mov_b32 s5, s12
16159; GFX6-NEXT:    s_mov_b32 s6, s11
16160; GFX6-NEXT:    s_mov_b32 s7, s10
16161; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16162; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16163; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16164; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16165; GFX6-NEXT:    s_waitcnt vmcnt(0)
16166; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16167; GFX6-NEXT:    s_waitcnt vmcnt(0)
16168; GFX6-NEXT:    buffer_wbinvl1
16169; GFX6-NEXT:    s_endpgm
16170;
16171; GFX7-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16172; GFX7:       ; %bb.0: ; %entry
16173; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16174; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16175; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16176; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16177; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16178; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16179; GFX7-NEXT:    s_mov_b32 s4, s8
16180; GFX7-NEXT:    s_mov_b32 s5, s9
16181; GFX7-NEXT:    s_mov_b32 s9, s10
16182; GFX7-NEXT:    s_mov_b32 s8, s11
16183; GFX7-NEXT:    s_add_u32 s4, s4, s9
16184; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16185; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16186; GFX7-NEXT:    s_mov_b32 s5, s8
16187; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16188; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16189; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16190; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16191; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16192; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16193; GFX7-NEXT:    s_waitcnt vmcnt(0)
16194; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16195; GFX7-NEXT:    s_waitcnt vmcnt(0)
16196; GFX7-NEXT:    buffer_wbinvl1_vol
16197; GFX7-NEXT:    s_endpgm
16198;
16199; GFX10-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16200; GFX10-WGP:       ; %bb.0: ; %entry
16201; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16202; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16203; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16204; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16205; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16206; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16207; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16208; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16209; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16210; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16211; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16212; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16213; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16214; GFX10-WGP-NEXT:    buffer_gl1_inv
16215; GFX10-WGP-NEXT:    buffer_gl0_inv
16216; GFX10-WGP-NEXT:    s_endpgm
16217;
16218; GFX10-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16219; GFX10-CU:       ; %bb.0: ; %entry
16220; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16221; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16222; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16223; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16224; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16225; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16226; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16227; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16228; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16229; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16230; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16231; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16232; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16233; GFX10-CU-NEXT:    buffer_gl1_inv
16234; GFX10-CU-NEXT:    buffer_gl0_inv
16235; GFX10-CU-NEXT:    s_endpgm
16236;
16237; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16238; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16239; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16240; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16241; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16242; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16243; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16244; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16245; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16246; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16247; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16248; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16249; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16250; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16251; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16252; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16253; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16254; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16255; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16256; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16257; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16258; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16259; SKIP-CACHE-INV-NEXT:    s_endpgm
16260;
16261; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16262; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16263; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16264; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16265; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16266; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16267; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16268; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16269; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16270; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16271; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16272; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
16273; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16274; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16275; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16276; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
16277; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16278; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16279;
16280; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16281; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16282; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16283; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16284; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16285; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16286; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16287; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16288; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16289; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16290; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16291; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
16292; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16293; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16294; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16295; GFX90A-TGSPLIT-NEXT:    buffer_invl2
16296; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16297; GFX90A-TGSPLIT-NEXT:    s_endpgm
16298;
16299; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16300; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16301; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16302; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16303; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16304; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16305; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16306; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16307; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16308; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16309; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16310; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16311; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16312; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16313; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16314; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
16315; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16316;
16317; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16318; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16319; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16320; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16321; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16322; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16323; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16324; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16325; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16326; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16327; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16328; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16329; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16330; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16331; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16332; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
16333; GFX940-TGSPLIT-NEXT:    s_endpgm
16334;
16335; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16336; GFX11-WGP:       ; %bb.0: ; %entry
16337; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16338; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16339; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16340; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16341; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16342; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16343; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16344; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16345; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16346; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16347; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16348; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16349; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16350; GFX11-WGP-NEXT:    buffer_gl1_inv
16351; GFX11-WGP-NEXT:    buffer_gl0_inv
16352; GFX11-WGP-NEXT:    s_endpgm
16353;
16354; GFX11-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16355; GFX11-CU:       ; %bb.0: ; %entry
16356; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16357; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16358; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16359; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16360; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16361; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16362; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16363; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16364; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16365; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16366; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16367; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16368; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16369; GFX11-CU-NEXT:    buffer_gl1_inv
16370; GFX11-CU-NEXT:    buffer_gl0_inv
16371; GFX11-CU-NEXT:    s_endpgm
16372;
16373; GFX12-WGP-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16374; GFX12-WGP:       ; %bb.0: ; %entry
16375; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16376; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16377; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16378; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16379; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16380; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16381; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16382; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16383; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16384; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
16385; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16386; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16387; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16388; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16389; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16390; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16391; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
16392; GFX12-WGP-NEXT:    s_endpgm
16393;
16394; GFX12-CU-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
16395; GFX12-CU:       ; %bb.0: ; %entry
16396; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16397; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16398; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16399; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16400; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16401; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16402; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16403; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16404; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16405; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
16406; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16407; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16408; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16409; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16410; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16411; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16412; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
16413; GFX12-CU-NEXT:    s_endpgm
16414    ptr addrspace(1) %out, i32 %in, i32 %old) {
16415entry:
16416  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16417  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst
16418  ret void
16419}
16420
16421define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
16422; GFX6-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16423; GFX6:       ; %bb.0: ; %entry
16424; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16425; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16426; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16427; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16428; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16429; GFX6-NEXT:    s_mov_b32 s12, s5
16430; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16431; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16432; GFX6-NEXT:    s_mov_b32 s11, -1
16433; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16434; GFX6-NEXT:    s_mov_b32 s5, s12
16435; GFX6-NEXT:    s_mov_b32 s6, s11
16436; GFX6-NEXT:    s_mov_b32 s7, s10
16437; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16438; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16439; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16440; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16441; GFX6-NEXT:    s_waitcnt vmcnt(0)
16442; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16443; GFX6-NEXT:    s_waitcnt vmcnt(0)
16444; GFX6-NEXT:    buffer_wbinvl1
16445; GFX6-NEXT:    s_endpgm
16446;
16447; GFX7-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16448; GFX7:       ; %bb.0: ; %entry
16449; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16450; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16451; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16452; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16453; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16454; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16455; GFX7-NEXT:    s_mov_b32 s4, s8
16456; GFX7-NEXT:    s_mov_b32 s5, s9
16457; GFX7-NEXT:    s_mov_b32 s9, s10
16458; GFX7-NEXT:    s_mov_b32 s8, s11
16459; GFX7-NEXT:    s_add_u32 s4, s4, s9
16460; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16461; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16462; GFX7-NEXT:    s_mov_b32 s5, s8
16463; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16464; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16465; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16466; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16467; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16468; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16469; GFX7-NEXT:    s_waitcnt vmcnt(0)
16470; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16471; GFX7-NEXT:    s_waitcnt vmcnt(0)
16472; GFX7-NEXT:    buffer_wbinvl1_vol
16473; GFX7-NEXT:    s_endpgm
16474;
16475; GFX10-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16476; GFX10-WGP:       ; %bb.0: ; %entry
16477; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16478; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16479; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16480; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16481; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16482; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16483; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16484; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16485; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16486; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16487; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16488; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16489; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16490; GFX10-WGP-NEXT:    buffer_gl1_inv
16491; GFX10-WGP-NEXT:    buffer_gl0_inv
16492; GFX10-WGP-NEXT:    s_endpgm
16493;
16494; GFX10-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16495; GFX10-CU:       ; %bb.0: ; %entry
16496; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16497; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16498; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16499; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16500; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16501; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16502; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16503; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16504; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16505; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16506; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16507; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16508; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16509; GFX10-CU-NEXT:    buffer_gl1_inv
16510; GFX10-CU-NEXT:    buffer_gl0_inv
16511; GFX10-CU-NEXT:    s_endpgm
16512;
16513; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16514; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16515; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16516; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16517; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16518; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16519; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16520; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16521; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16522; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16523; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16524; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16525; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16526; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16527; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16528; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16529; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16530; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16531; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16532; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16533; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16534; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16535; SKIP-CACHE-INV-NEXT:    s_endpgm
16536;
16537; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16538; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16539; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16540; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16541; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16542; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16543; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16544; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16545; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16546; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16547; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16548; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
16549; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16550; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16551; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16552; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
16553; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16554; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16555;
16556; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16557; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16558; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16559; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16560; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16561; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16562; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16563; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16564; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16565; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16566; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16567; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
16568; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16569; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16570; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16571; GFX90A-TGSPLIT-NEXT:    buffer_invl2
16572; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16573; GFX90A-TGSPLIT-NEXT:    s_endpgm
16574;
16575; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16576; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16577; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16578; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16579; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16580; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16581; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16582; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16583; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16584; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16585; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16586; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16587; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16588; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16589; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16590; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
16591; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16592;
16593; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16594; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16595; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16596; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16597; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16598; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16599; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16600; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16601; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16602; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16603; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16604; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16605; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16606; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16607; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16608; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
16609; GFX940-TGSPLIT-NEXT:    s_endpgm
16610;
16611; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16612; GFX11-WGP:       ; %bb.0: ; %entry
16613; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16614; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16615; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16616; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16617; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16618; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16619; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16620; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16621; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16622; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16623; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16624; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16625; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16626; GFX11-WGP-NEXT:    buffer_gl1_inv
16627; GFX11-WGP-NEXT:    buffer_gl0_inv
16628; GFX11-WGP-NEXT:    s_endpgm
16629;
16630; GFX11-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16631; GFX11-CU:       ; %bb.0: ; %entry
16632; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16633; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16634; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16635; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16636; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16637; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16638; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16639; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16640; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16641; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16642; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16643; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16644; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16645; GFX11-CU-NEXT:    buffer_gl1_inv
16646; GFX11-CU-NEXT:    buffer_gl0_inv
16647; GFX11-CU-NEXT:    s_endpgm
16648;
16649; GFX12-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16650; GFX12-WGP:       ; %bb.0: ; %entry
16651; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16652; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16653; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16654; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16655; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16656; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16657; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16658; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16659; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16660; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
16661; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16662; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16663; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16664; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16665; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16666; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16667; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
16668; GFX12-WGP-NEXT:    s_endpgm
16669;
16670; GFX12-CU-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
16671; GFX12-CU:       ; %bb.0: ; %entry
16672; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16673; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16674; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16675; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16676; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16677; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16678; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16679; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16680; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16681; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
16682; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16683; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16684; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16685; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16686; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16687; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16688; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
16689; GFX12-CU-NEXT:    s_endpgm
16690    ptr addrspace(1) %out, i32 %in, i32 %old) {
16691entry:
16692  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16693  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst
16694  ret void
16695}
16696
16697define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
16698; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16699; GFX6:       ; %bb.0: ; %entry
16700; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16701; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16702; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16703; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16704; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16705; GFX6-NEXT:    s_mov_b32 s12, s5
16706; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16707; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16708; GFX6-NEXT:    s_mov_b32 s11, -1
16709; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16710; GFX6-NEXT:    s_mov_b32 s5, s12
16711; GFX6-NEXT:    s_mov_b32 s6, s11
16712; GFX6-NEXT:    s_mov_b32 s7, s10
16713; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16714; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16715; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16716; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16717; GFX6-NEXT:    s_waitcnt vmcnt(0)
16718; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16
16719; GFX6-NEXT:    s_waitcnt vmcnt(0)
16720; GFX6-NEXT:    buffer_wbinvl1
16721; GFX6-NEXT:    s_endpgm
16722;
16723; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16724; GFX7:       ; %bb.0: ; %entry
16725; GFX7-NEXT:    s_mov_b64 s[4:5], s[8:9]
16726; GFX7-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
16727; GFX7-NEXT:    s_load_dword s7, s[4:5], 0x2
16728; GFX7-NEXT:    s_load_dword s6, s[4:5], 0x3
16729; GFX7-NEXT:    s_mov_b64 s[10:11], 16
16730; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
16731; GFX7-NEXT:    s_mov_b32 s4, s8
16732; GFX7-NEXT:    s_mov_b32 s5, s9
16733; GFX7-NEXT:    s_mov_b32 s9, s10
16734; GFX7-NEXT:    s_mov_b32 s8, s11
16735; GFX7-NEXT:    s_add_u32 s4, s4, s9
16736; GFX7-NEXT:    s_addc_u32 s8, s5, s8
16737; GFX7-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5
16738; GFX7-NEXT:    s_mov_b32 s5, s8
16739; GFX7-NEXT:    v_mov_b32_e32 v2, s7
16740; GFX7-NEXT:    v_mov_b32_e32 v0, s6
16741; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16742; GFX7-NEXT:    v_mov_b32_e32 v3, v0
16743; GFX7-NEXT:    v_mov_b32_e32 v0, s4
16744; GFX7-NEXT:    v_mov_b32_e32 v1, s5
16745; GFX7-NEXT:    s_waitcnt vmcnt(0)
16746; GFX7-NEXT:    flat_atomic_cmpswap v[0:1], v[2:3]
16747; GFX7-NEXT:    s_waitcnt vmcnt(0)
16748; GFX7-NEXT:    buffer_wbinvl1_vol
16749; GFX7-NEXT:    s_endpgm
16750;
16751; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16752; GFX10-WGP:       ; %bb.0: ; %entry
16753; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
16754; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16755; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
16756; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
16757; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16758; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
16759; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
16760; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16761; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
16762; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
16763; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16764; GFX10-WGP-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16765; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16766; GFX10-WGP-NEXT:    buffer_gl1_inv
16767; GFX10-WGP-NEXT:    buffer_gl0_inv
16768; GFX10-WGP-NEXT:    s_endpgm
16769;
16770; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16771; GFX10-CU:       ; %bb.0: ; %entry
16772; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
16773; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16774; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
16775; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
16776; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
16777; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
16778; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
16779; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16780; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
16781; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
16782; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16783; GFX10-CU-NEXT:    global_atomic_cmpswap v0, v[1:2], s[4:5] offset:16
16784; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16785; GFX10-CU-NEXT:    buffer_gl1_inv
16786; GFX10-CU-NEXT:    buffer_gl0_inv
16787; GFX10-CU-NEXT:    s_endpgm
16788;
16789; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16790; SKIP-CACHE-INV:       ; %bb.0: ; %entry
16791; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
16792; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
16793; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
16794; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
16795; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
16796; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
16797; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
16798; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
16799; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
16800; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
16801; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
16802; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
16803; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
16804; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
16805; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
16806; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16807; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
16808; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16809; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16
16810; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
16811; SKIP-CACHE-INV-NEXT:    s_endpgm
16812;
16813; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16814; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
16815; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16816; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16817; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16818; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16819; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16820; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16821; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16822; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16823; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16824; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
16825; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16826; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16827; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16828; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
16829; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
16830; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
16831;
16832; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16833; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
16834; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16835; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
16836; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
16837; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
16838; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16839; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
16840; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
16841; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16842; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16843; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
16844; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16845; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[4:5] offset:16
16846; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16847; GFX90A-TGSPLIT-NEXT:    buffer_invl2
16848; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
16849; GFX90A-TGSPLIT-NEXT:    s_endpgm
16850;
16851; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16852; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
16853; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16854; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16855; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16856; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16857; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16858; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16859; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16860; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16861; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16862; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16863; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16864; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16865; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16866; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
16867; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
16868;
16869; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16870; GFX940-TGSPLIT:       ; %bb.0: ; %entry
16871; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
16872; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
16873; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
16874; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
16875; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
16876; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
16877; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
16878; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
16879; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
16880; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
16881; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16882; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v0, v[2:3], s[0:1] offset:16 sc1
16883; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
16884; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
16885; GFX940-TGSPLIT-NEXT:    s_endpgm
16886;
16887; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16888; GFX11-WGP:       ; %bb.0: ; %entry
16889; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
16890; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16891; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16892; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16893; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
16894; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
16895; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
16896; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16897; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
16898; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
16899; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16900; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16901; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
16902; GFX11-WGP-NEXT:    buffer_gl1_inv
16903; GFX11-WGP-NEXT:    buffer_gl0_inv
16904; GFX11-WGP-NEXT:    s_endpgm
16905;
16906; GFX11-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16907; GFX11-CU:       ; %bb.0: ; %entry
16908; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
16909; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16910; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16911; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16912; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
16913; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
16914; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
16915; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16916; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
16917; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
16918; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16919; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16
16920; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
16921; GFX11-CU-NEXT:    buffer_gl1_inv
16922; GFX11-CU-NEXT:    buffer_gl0_inv
16923; GFX11-CU-NEXT:    s_endpgm
16924;
16925; GFX12-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16926; GFX12-WGP:       ; %bb.0: ; %entry
16927; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
16928; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16929; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
16930; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
16931; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
16932; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
16933; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
16934; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16935; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
16936; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
16937; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
16938; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
16939; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
16940; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16941; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16942; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
16943; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
16944; GFX12-WGP-NEXT:    s_endpgm
16945;
16946; GFX12-CU-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
16947; GFX12-CU:       ; %bb.0: ; %entry
16948; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
16949; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
16950; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
16951; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
16952; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
16953; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
16954; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
16955; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
16956; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
16957; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
16958; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
16959; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
16960; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
16961; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16962; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v0, v[1:2], s[0:1] offset:16 scope:SCOPE_SYS
16963; GFX12-CU-NEXT:    s_wait_storecnt 0x0
16964; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
16965; GFX12-CU-NEXT:    s_endpgm
16966    ptr addrspace(1) %out, i32 %in, i32 %old) {
16967entry:
16968  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
16969  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
16970  ret void
16971}
16972
16973define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
16974; GFX6-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
16975; GFX6:       ; %bb.0: ; %entry
16976; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
16977; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
16978; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
16979; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
16980; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
16981; GFX6-NEXT:    s_mov_b32 s12, s5
16982; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
16983; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
16984; GFX6-NEXT:    s_mov_b32 s11, -1
16985; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
16986; GFX6-NEXT:    s_mov_b32 s5, s12
16987; GFX6-NEXT:    s_mov_b32 s6, s11
16988; GFX6-NEXT:    s_mov_b32 s7, s10
16989; GFX6-NEXT:    v_mov_b32_e32 v0, s9
16990; GFX6-NEXT:    v_mov_b32_e32 v2, s8
16991; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
16992; GFX6-NEXT:    v_mov_b32_e32 v1, v2
16993; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
16994; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
16995; GFX6-NEXT:    s_waitcnt vmcnt(0)
16996; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
16997; GFX6-NEXT:    s_endpgm
16998;
16999; GFX7-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17000; GFX7:       ; %bb.0: ; %entry
17001; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
17002; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17003; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
17004; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
17005; GFX7-NEXT:    s_mov_b64 s[12:13], 16
17006; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17007; GFX7-NEXT:    s_mov_b32 s6, s4
17008; GFX7-NEXT:    s_mov_b32 s7, s5
17009; GFX7-NEXT:    s_mov_b32 s11, s12
17010; GFX7-NEXT:    s_mov_b32 s10, s13
17011; GFX7-NEXT:    s_add_u32 s6, s6, s11
17012; GFX7-NEXT:    s_addc_u32 s10, s7, s10
17013; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
17014; GFX7-NEXT:    s_mov_b32 s7, s10
17015; GFX7-NEXT:    v_mov_b32_e32 v2, s9
17016; GFX7-NEXT:    v_mov_b32_e32 v0, s8
17017; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17018; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17019; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17020; GFX7-NEXT:    v_mov_b32_e32 v1, s7
17021; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17022; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17023; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17024; GFX7-NEXT:    s_waitcnt vmcnt(0)
17025; GFX7-NEXT:    flat_store_dword v[0:1], v2
17026; GFX7-NEXT:    s_endpgm
17027;
17028; GFX10-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17029; GFX10-WGP:       ; %bb.0: ; %entry
17030; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17031; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17032; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17033; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17034; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17035; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17036; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17037; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17038; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17039; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17040; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17041; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
17042; GFX10-WGP-NEXT:    s_endpgm
17043;
17044; GFX10-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17045; GFX10-CU:       ; %bb.0: ; %entry
17046; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17047; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17048; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17049; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17050; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17051; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17052; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17053; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17054; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17055; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17056; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17057; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
17058; GFX10-CU-NEXT:    s_endpgm
17059;
17060; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17061; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17062; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17063; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17064; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17065; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17066; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17067; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17068; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17069; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17070; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17071; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17072; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17073; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17074; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17075; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17076; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17077; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17078; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17079; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17080; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17081; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17082; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17083; SKIP-CACHE-INV-NEXT:    s_endpgm
17084;
17085; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17086; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17087; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17088; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17089; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17090; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17091; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17092; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17093; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17094; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17095; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17096; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17097; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17098; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17099; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17100;
17101; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17102; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17103; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17104; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17105; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17106; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17107; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17108; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17109; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17110; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17111; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17112; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17113; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17114; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17115; GFX90A-TGSPLIT-NEXT:    s_endpgm
17116;
17117; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17118; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17119; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17120; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17121; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17122; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17123; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17124; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17125; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17126; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17127; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17128; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17129; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17130; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17131; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17132;
17133; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17134; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17135; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17136; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17137; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17138; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17139; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17140; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17141; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17142; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17143; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17144; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17145; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17146; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17147; GFX940-TGSPLIT-NEXT:    s_endpgm
17148;
17149; GFX11-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17150; GFX11-WGP:       ; %bb.0: ; %entry
17151; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17152; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17153; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17154; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17155; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17156; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17157; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17158; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17159; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17160; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17161; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17162; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17163; GFX11-WGP-NEXT:    s_endpgm
17164;
17165; GFX11-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17166; GFX11-CU:       ; %bb.0: ; %entry
17167; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17168; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17169; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17170; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17171; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17172; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17173; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17174; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17175; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17176; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17177; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17178; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17179; GFX11-CU-NEXT:    s_endpgm
17180;
17181; GFX12-WGP-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17182; GFX12-WGP:       ; %bb.0: ; %entry
17183; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17184; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17185; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17186; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17187; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17188; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17189; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17190; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17191; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17192; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17193; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17194; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17195; GFX12-WGP-NEXT:    s_endpgm
17196;
17197; GFX12-CU-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
17198; GFX12-CU:       ; %bb.0: ; %entry
17199; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17200; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17201; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17202; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17203; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17204; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17205; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17206; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17207; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17208; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17209; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17210; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17211; GFX12-CU-NEXT:    s_endpgm
17212    ptr addrspace(1) %out, i32 %in, i32 %old) {
17213entry:
17214  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17215  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic
17216  %val0 = extractvalue { i32, i1 } %val, 0
17217  store i32 %val0, ptr addrspace(1) %out, align 4
17218  ret void
17219}
17220
17221define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
17222; GFX6-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17223; GFX6:       ; %bb.0: ; %entry
17224; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17225; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17226; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17227; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17228; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17229; GFX6-NEXT:    s_mov_b32 s12, s5
17230; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17231; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17232; GFX6-NEXT:    s_mov_b32 s11, -1
17233; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17234; GFX6-NEXT:    s_mov_b32 s5, s12
17235; GFX6-NEXT:    s_mov_b32 s6, s11
17236; GFX6-NEXT:    s_mov_b32 s7, s10
17237; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17238; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17239; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17240; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17241; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17242; GFX6-NEXT:    s_waitcnt vmcnt(0)
17243; GFX6-NEXT:    buffer_wbinvl1
17244; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17245; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
17246; GFX6-NEXT:    s_endpgm
17247;
17248; GFX7-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17249; GFX7:       ; %bb.0: ; %entry
17250; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
17251; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17252; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
17253; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
17254; GFX7-NEXT:    s_mov_b64 s[12:13], 16
17255; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17256; GFX7-NEXT:    s_mov_b32 s6, s4
17257; GFX7-NEXT:    s_mov_b32 s7, s5
17258; GFX7-NEXT:    s_mov_b32 s11, s12
17259; GFX7-NEXT:    s_mov_b32 s10, s13
17260; GFX7-NEXT:    s_add_u32 s6, s6, s11
17261; GFX7-NEXT:    s_addc_u32 s10, s7, s10
17262; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
17263; GFX7-NEXT:    s_mov_b32 s7, s10
17264; GFX7-NEXT:    v_mov_b32_e32 v2, s9
17265; GFX7-NEXT:    v_mov_b32_e32 v0, s8
17266; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17267; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17268; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17269; GFX7-NEXT:    v_mov_b32_e32 v1, s7
17270; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17271; GFX7-NEXT:    s_waitcnt vmcnt(0)
17272; GFX7-NEXT:    buffer_wbinvl1_vol
17273; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17274; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17275; GFX7-NEXT:    flat_store_dword v[0:1], v2
17276; GFX7-NEXT:    s_endpgm
17277;
17278; GFX10-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17279; GFX10-WGP:       ; %bb.0: ; %entry
17280; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17281; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17282; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17283; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17284; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17285; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17286; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17287; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17288; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17289; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17290; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17291; GFX10-WGP-NEXT:    buffer_gl1_inv
17292; GFX10-WGP-NEXT:    buffer_gl0_inv
17293; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
17294; GFX10-WGP-NEXT:    s_endpgm
17295;
17296; GFX10-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17297; GFX10-CU:       ; %bb.0: ; %entry
17298; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17299; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17300; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17301; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17302; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17303; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17304; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17305; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17306; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17307; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17308; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17309; GFX10-CU-NEXT:    buffer_gl1_inv
17310; GFX10-CU-NEXT:    buffer_gl0_inv
17311; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
17312; GFX10-CU-NEXT:    s_endpgm
17313;
17314; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17315; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17316; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17317; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17318; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17319; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17320; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17321; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17322; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17323; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17324; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17325; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17326; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17327; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17328; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17329; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17330; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17331; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17332; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17333; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17334; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17335; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17336; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17337; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17338; SKIP-CACHE-INV-NEXT:    s_endpgm
17339;
17340; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17341; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17342; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17343; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17344; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17345; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17346; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17347; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17348; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17349; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17350; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17351; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17352; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17353; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
17354; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17355; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17356; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17357;
17358; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17359; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17360; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17361; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17362; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17363; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17364; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17365; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17366; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17367; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17368; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17369; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17370; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17371; GFX90A-TGSPLIT-NEXT:    buffer_invl2
17372; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17373; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17374; GFX90A-TGSPLIT-NEXT:    s_endpgm
17375;
17376; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17377; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17378; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17379; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17380; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17381; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17382; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17383; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17384; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17385; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17386; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17387; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17388; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17389; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
17390; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17391; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17392;
17393; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17394; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17395; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17396; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17397; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17398; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17399; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17400; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17401; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17402; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17403; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17404; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17405; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17406; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
17407; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17408; GFX940-TGSPLIT-NEXT:    s_endpgm
17409;
17410; GFX11-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17411; GFX11-WGP:       ; %bb.0: ; %entry
17412; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17413; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17414; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17415; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17416; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17417; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17418; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17419; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17420; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17421; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17422; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17423; GFX11-WGP-NEXT:    buffer_gl1_inv
17424; GFX11-WGP-NEXT:    buffer_gl0_inv
17425; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17426; GFX11-WGP-NEXT:    s_endpgm
17427;
17428; GFX11-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17429; GFX11-CU:       ; %bb.0: ; %entry
17430; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17431; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17432; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17433; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17434; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17435; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17436; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17437; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17438; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17439; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17440; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17441; GFX11-CU-NEXT:    buffer_gl1_inv
17442; GFX11-CU-NEXT:    buffer_gl0_inv
17443; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17444; GFX11-CU-NEXT:    s_endpgm
17445;
17446; GFX12-WGP-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17447; GFX12-WGP:       ; %bb.0: ; %entry
17448; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17449; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17450; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17451; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17452; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17453; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17454; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17455; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17456; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17457; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17458; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17459; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
17460; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17461; GFX12-WGP-NEXT:    s_endpgm
17462;
17463; GFX12-CU-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
17464; GFX12-CU:       ; %bb.0: ; %entry
17465; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17466; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17467; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17468; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17469; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17470; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17471; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17472; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17473; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17474; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17475; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17476; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
17477; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17478; GFX12-CU-NEXT:    s_endpgm
17479    ptr addrspace(1) %out, i32 %in, i32 %old) {
17480entry:
17481  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17482  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic
17483  %val0 = extractvalue { i32, i1 } %val, 0
17484  store i32 %val0, ptr addrspace(1) %out, align 4
17485  ret void
17486}
17487
17488define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
17489; GFX6-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17490; GFX6:       ; %bb.0: ; %entry
17491; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17492; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17493; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17494; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17495; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17496; GFX6-NEXT:    s_mov_b32 s12, s5
17497; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17498; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17499; GFX6-NEXT:    s_mov_b32 s11, -1
17500; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17501; GFX6-NEXT:    s_mov_b32 s5, s12
17502; GFX6-NEXT:    s_mov_b32 s6, s11
17503; GFX6-NEXT:    s_mov_b32 s7, s10
17504; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17505; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17506; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17507; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17508; GFX6-NEXT:    s_waitcnt vmcnt(0)
17509; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17510; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17511; GFX6-NEXT:    s_waitcnt vmcnt(0)
17512; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
17513; GFX6-NEXT:    s_endpgm
17514;
17515; GFX7-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17516; GFX7:       ; %bb.0: ; %entry
17517; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
17518; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17519; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
17520; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
17521; GFX7-NEXT:    s_mov_b64 s[12:13], 16
17522; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17523; GFX7-NEXT:    s_mov_b32 s6, s4
17524; GFX7-NEXT:    s_mov_b32 s7, s5
17525; GFX7-NEXT:    s_mov_b32 s11, s12
17526; GFX7-NEXT:    s_mov_b32 s10, s13
17527; GFX7-NEXT:    s_add_u32 s6, s6, s11
17528; GFX7-NEXT:    s_addc_u32 s10, s7, s10
17529; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
17530; GFX7-NEXT:    s_mov_b32 s7, s10
17531; GFX7-NEXT:    v_mov_b32_e32 v2, s9
17532; GFX7-NEXT:    v_mov_b32_e32 v0, s8
17533; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17534; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17535; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17536; GFX7-NEXT:    v_mov_b32_e32 v1, s7
17537; GFX7-NEXT:    s_waitcnt vmcnt(0)
17538; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17539; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17540; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17541; GFX7-NEXT:    s_waitcnt vmcnt(0)
17542; GFX7-NEXT:    flat_store_dword v[0:1], v2
17543; GFX7-NEXT:    s_endpgm
17544;
17545; GFX10-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17546; GFX10-WGP:       ; %bb.0: ; %entry
17547; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17548; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17549; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17550; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17551; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17552; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17553; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17554; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17555; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17556; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17557; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17558; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17559; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17560; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
17561; GFX10-WGP-NEXT:    s_endpgm
17562;
17563; GFX10-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17564; GFX10-CU:       ; %bb.0: ; %entry
17565; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17566; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17567; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17568; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17569; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17570; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17571; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17572; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17573; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17574; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17575; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17576; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17577; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17578; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
17579; GFX10-CU-NEXT:    s_endpgm
17580;
17581; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17582; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17583; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17584; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17585; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17586; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17587; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17588; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17589; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17590; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17591; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17592; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17593; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17594; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17595; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17596; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17597; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17598; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17599; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17600; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17601; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17602; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17603; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17604; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17605; SKIP-CACHE-INV-NEXT:    s_endpgm
17606;
17607; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17608; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17609; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17610; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17611; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17612; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17613; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17614; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17615; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17616; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17617; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17618; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
17619; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17620; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17621; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17622; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17623; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17624;
17625; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17626; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17627; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17628; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17629; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17630; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17631; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17632; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17633; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17634; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17635; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17636; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
17637; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17638; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17639; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17640; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17641; GFX90A-TGSPLIT-NEXT:    s_endpgm
17642;
17643; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17644; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17645; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17646; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17647; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17648; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17649; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17650; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17651; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17652; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17653; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17654; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
17655; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17656; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17657; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17658; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17659; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17660;
17661; GFX940-TGSPLIT-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17662; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17663; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17664; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17665; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17666; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17667; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17668; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17669; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17670; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17671; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17672; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
17673; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17674; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17675; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17676; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17677; GFX940-TGSPLIT-NEXT:    s_endpgm
17678;
17679; GFX11-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17680; GFX11-WGP:       ; %bb.0: ; %entry
17681; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17682; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17683; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17684; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17685; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17686; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17687; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17688; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17689; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17690; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17691; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17692; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17693; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17694; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17695; GFX11-WGP-NEXT:    s_endpgm
17696;
17697; GFX11-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17698; GFX11-CU:       ; %bb.0: ; %entry
17699; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17700; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17701; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17702; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17703; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17704; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17705; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17706; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17707; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
17708; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17709; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17710; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17711; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
17712; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17713; GFX11-CU-NEXT:    s_endpgm
17714;
17715; GFX12-WGP-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17716; GFX12-WGP:       ; %bb.0: ; %entry
17717; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
17718; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17719; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17720; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17721; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
17722; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
17723; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
17724; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17725; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
17726; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
17727; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
17728; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
17729; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17730; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
17731; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17732; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
17733; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17734; GFX12-WGP-NEXT:    s_endpgm
17735;
17736; GFX12-CU-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
17737; GFX12-CU:       ; %bb.0: ; %entry
17738; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
17739; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17740; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17741; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17742; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
17743; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
17744; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
17745; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17746; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
17747; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
17748; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
17749; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
17750; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17751; GFX12-CU-NEXT:    s_wait_storecnt 0x0
17752; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
17753; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
17754; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
17755; GFX12-CU-NEXT:    s_endpgm
17756    ptr addrspace(1) %out, i32 %in, i32 %old) {
17757entry:
17758  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
17759  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release monotonic
17760  %val0 = extractvalue { i32, i1 } %val, 0
17761  store i32 %val0, ptr addrspace(1) %out, align 4
17762  ret void
17763}
17764
17765define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
17766; GFX6-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17767; GFX6:       ; %bb.0: ; %entry
17768; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
17769; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17770; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
17771; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
17772; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
17773; GFX6-NEXT:    s_mov_b32 s12, s5
17774; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
17775; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
17776; GFX6-NEXT:    s_mov_b32 s11, -1
17777; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
17778; GFX6-NEXT:    s_mov_b32 s5, s12
17779; GFX6-NEXT:    s_mov_b32 s6, s11
17780; GFX6-NEXT:    s_mov_b32 s7, s10
17781; GFX6-NEXT:    v_mov_b32_e32 v0, s9
17782; GFX6-NEXT:    v_mov_b32_e32 v2, s8
17783; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17784; GFX6-NEXT:    v_mov_b32_e32 v1, v2
17785; GFX6-NEXT:    s_waitcnt vmcnt(0)
17786; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
17787; GFX6-NEXT:    s_waitcnt vmcnt(0)
17788; GFX6-NEXT:    buffer_wbinvl1
17789; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17790; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
17791; GFX6-NEXT:    s_endpgm
17792;
17793; GFX7-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17794; GFX7:       ; %bb.0: ; %entry
17795; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
17796; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
17797; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
17798; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
17799; GFX7-NEXT:    s_mov_b64 s[12:13], 16
17800; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
17801; GFX7-NEXT:    s_mov_b32 s6, s4
17802; GFX7-NEXT:    s_mov_b32 s7, s5
17803; GFX7-NEXT:    s_mov_b32 s11, s12
17804; GFX7-NEXT:    s_mov_b32 s10, s13
17805; GFX7-NEXT:    s_add_u32 s6, s6, s11
17806; GFX7-NEXT:    s_addc_u32 s10, s7, s10
17807; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
17808; GFX7-NEXT:    s_mov_b32 s7, s10
17809; GFX7-NEXT:    v_mov_b32_e32 v2, s9
17810; GFX7-NEXT:    v_mov_b32_e32 v0, s8
17811; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17812; GFX7-NEXT:    v_mov_b32_e32 v3, v0
17813; GFX7-NEXT:    v_mov_b32_e32 v0, s6
17814; GFX7-NEXT:    v_mov_b32_e32 v1, s7
17815; GFX7-NEXT:    s_waitcnt vmcnt(0)
17816; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
17817; GFX7-NEXT:    s_waitcnt vmcnt(0)
17818; GFX7-NEXT:    buffer_wbinvl1_vol
17819; GFX7-NEXT:    v_mov_b32_e32 v0, s4
17820; GFX7-NEXT:    v_mov_b32_e32 v1, s5
17821; GFX7-NEXT:    flat_store_dword v[0:1], v2
17822; GFX7-NEXT:    s_endpgm
17823;
17824; GFX10-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17825; GFX10-WGP:       ; %bb.0: ; %entry
17826; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
17827; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17828; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
17829; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
17830; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17831; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
17832; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
17833; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17834; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
17835; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17836; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17837; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17838; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
17839; GFX10-WGP-NEXT:    buffer_gl1_inv
17840; GFX10-WGP-NEXT:    buffer_gl0_inv
17841; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
17842; GFX10-WGP-NEXT:    s_endpgm
17843;
17844; GFX10-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17845; GFX10-CU:       ; %bb.0: ; %entry
17846; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
17847; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17848; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
17849; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
17850; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
17851; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
17852; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
17853; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17854; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
17855; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17856; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
17857; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
17858; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
17859; GFX10-CU-NEXT:    buffer_gl1_inv
17860; GFX10-CU-NEXT:    buffer_gl0_inv
17861; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
17862; GFX10-CU-NEXT:    s_endpgm
17863;
17864; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17865; SKIP-CACHE-INV:       ; %bb.0: ; %entry
17866; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
17867; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
17868; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
17869; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
17870; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
17871; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
17872; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
17873; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
17874; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
17875; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
17876; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
17877; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
17878; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
17879; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
17880; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
17881; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
17882; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
17883; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17884; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
17885; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17886; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
17887; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
17888; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
17889; SKIP-CACHE-INV-NEXT:    s_endpgm
17890;
17891; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17892; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
17893; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17894; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17895; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17896; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17897; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17898; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17899; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17900; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17901; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17902; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
17903; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17904; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17905; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17906; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
17907; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
17908; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17909; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
17910;
17911; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17912; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
17913; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17914; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
17915; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
17916; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
17917; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17918; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
17919; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
17920; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17921; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17922; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
17923; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17924; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
17925; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17926; GFX90A-TGSPLIT-NEXT:    buffer_invl2
17927; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
17928; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
17929; GFX90A-TGSPLIT-NEXT:    s_endpgm
17930;
17931; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17932; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
17933; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17934; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17935; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17936; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17937; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17938; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17939; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17940; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17941; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17942; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
17943; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17944; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17945; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17946; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
17947; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17948; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
17949;
17950; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17951; GFX940-TGSPLIT:       ; %bb.0: ; %entry
17952; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
17953; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
17954; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
17955; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
17956; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
17957; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
17958; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
17959; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
17960; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
17961; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
17962; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17963; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
17964; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
17965; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
17966; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
17967; GFX940-TGSPLIT-NEXT:    s_endpgm
17968;
17969; GFX11-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17970; GFX11-WGP:       ; %bb.0: ; %entry
17971; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
17972; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17973; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
17974; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
17975; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
17976; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
17977; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
17978; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17979; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
17980; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17981; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
17982; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
17983; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
17984; GFX11-WGP-NEXT:    buffer_gl1_inv
17985; GFX11-WGP-NEXT:    buffer_gl0_inv
17986; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
17987; GFX11-WGP-NEXT:    s_endpgm
17988;
17989; GFX11-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
17990; GFX11-CU:       ; %bb.0: ; %entry
17991; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
17992; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
17993; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
17994; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
17995; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
17996; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
17997; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
17998; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
17999; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18000; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18001; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18002; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18003; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18004; GFX11-CU-NEXT:    buffer_gl1_inv
18005; GFX11-CU-NEXT:    buffer_gl0_inv
18006; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18007; GFX11-CU-NEXT:    s_endpgm
18008;
18009; GFX12-WGP-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
18010; GFX12-WGP:       ; %bb.0: ; %entry
18011; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18012; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18013; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18014; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18015; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18016; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18017; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18018; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18019; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18020; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
18021; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18022; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18023; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18024; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
18025; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18026; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18027; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18028; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18029; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
18030; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18031; GFX12-WGP-NEXT:    s_endpgm
18032;
18033; GFX12-CU-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
18034; GFX12-CU:       ; %bb.0: ; %entry
18035; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18036; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18037; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18038; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18039; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18040; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18041; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18042; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18043; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18044; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
18045; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18046; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18047; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18048; GFX12-CU-NEXT:    s_wait_storecnt 0x0
18049; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18050; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18051; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18052; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18053; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
18054; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18055; GFX12-CU-NEXT:    s_endpgm
18056    ptr addrspace(1) %out, i32 %in, i32 %old) {
18057entry:
18058  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18059  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic
18060  %val0 = extractvalue { i32, i1 } %val, 0
18061  store i32 %val0, ptr addrspace(1) %out, align 4
18062  ret void
18063}
18064
18065define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
18066; GFX6-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18067; GFX6:       ; %bb.0: ; %entry
18068; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18069; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18070; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18071; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18072; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18073; GFX6-NEXT:    s_mov_b32 s12, s5
18074; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18075; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18076; GFX6-NEXT:    s_mov_b32 s11, -1
18077; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18078; GFX6-NEXT:    s_mov_b32 s5, s12
18079; GFX6-NEXT:    s_mov_b32 s6, s11
18080; GFX6-NEXT:    s_mov_b32 s7, s10
18081; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18082; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18083; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18084; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18085; GFX6-NEXT:    s_waitcnt vmcnt(0)
18086; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18087; GFX6-NEXT:    s_waitcnt vmcnt(0)
18088; GFX6-NEXT:    buffer_wbinvl1
18089; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18090; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18091; GFX6-NEXT:    s_endpgm
18092;
18093; GFX7-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18094; GFX7:       ; %bb.0: ; %entry
18095; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18096; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18097; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18098; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18099; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18100; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18101; GFX7-NEXT:    s_mov_b32 s6, s4
18102; GFX7-NEXT:    s_mov_b32 s7, s5
18103; GFX7-NEXT:    s_mov_b32 s11, s12
18104; GFX7-NEXT:    s_mov_b32 s10, s13
18105; GFX7-NEXT:    s_add_u32 s6, s6, s11
18106; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18107; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18108; GFX7-NEXT:    s_mov_b32 s7, s10
18109; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18110; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18111; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18112; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18113; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18114; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18115; GFX7-NEXT:    s_waitcnt vmcnt(0)
18116; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18117; GFX7-NEXT:    s_waitcnt vmcnt(0)
18118; GFX7-NEXT:    buffer_wbinvl1_vol
18119; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18120; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18121; GFX7-NEXT:    flat_store_dword v[0:1], v2
18122; GFX7-NEXT:    s_endpgm
18123;
18124; GFX10-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18125; GFX10-WGP:       ; %bb.0: ; %entry
18126; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18127; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18128; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18129; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18130; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18131; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18132; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18133; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18134; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18135; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18136; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18137; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18138; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18139; GFX10-WGP-NEXT:    buffer_gl1_inv
18140; GFX10-WGP-NEXT:    buffer_gl0_inv
18141; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18142; GFX10-WGP-NEXT:    s_endpgm
18143;
18144; GFX10-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18145; GFX10-CU:       ; %bb.0: ; %entry
18146; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18147; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18148; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18149; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18150; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18151; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18152; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18153; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18154; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18155; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18156; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18157; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18158; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18159; GFX10-CU-NEXT:    buffer_gl1_inv
18160; GFX10-CU-NEXT:    buffer_gl0_inv
18161; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18162; GFX10-CU-NEXT:    s_endpgm
18163;
18164; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18165; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18166; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18167; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18168; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18169; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18170; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18171; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18172; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18173; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18174; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18175; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18176; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18177; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18178; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18179; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18180; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18181; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18182; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18183; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18184; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18185; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18186; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18187; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18188; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18189; SKIP-CACHE-INV-NEXT:    s_endpgm
18190;
18191; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18192; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18193; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18194; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18195; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18196; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18197; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18198; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18199; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18200; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18201; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18202; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
18203; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18204; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18205; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18206; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
18207; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
18208; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18209; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18210;
18211; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18212; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18213; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18214; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18215; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18216; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18217; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18218; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18219; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18220; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18221; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18222; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
18223; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18224; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18225; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18226; GFX90A-TGSPLIT-NEXT:    buffer_invl2
18227; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
18228; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18229; GFX90A-TGSPLIT-NEXT:    s_endpgm
18230;
18231; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18232; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18233; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18234; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18235; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18236; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18237; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18238; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18239; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18240; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18241; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18242; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
18243; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18244; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18245; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18246; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
18247; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18248; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18249;
18250; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18251; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18252; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18253; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18254; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18255; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18256; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18257; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18258; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18259; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18260; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18261; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
18262; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18263; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18264; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18265; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
18266; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18267; GFX940-TGSPLIT-NEXT:    s_endpgm
18268;
18269; GFX11-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18270; GFX11-WGP:       ; %bb.0: ; %entry
18271; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18272; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18273; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18274; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18275; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18276; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18277; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18278; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18279; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18280; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18281; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18282; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18283; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18284; GFX11-WGP-NEXT:    buffer_gl1_inv
18285; GFX11-WGP-NEXT:    buffer_gl0_inv
18286; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18287; GFX11-WGP-NEXT:    s_endpgm
18288;
18289; GFX11-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18290; GFX11-CU:       ; %bb.0: ; %entry
18291; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18292; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18293; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18294; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18295; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18296; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18297; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18298; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18299; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18300; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18301; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18302; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18303; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18304; GFX11-CU-NEXT:    buffer_gl1_inv
18305; GFX11-CU-NEXT:    buffer_gl0_inv
18306; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18307; GFX11-CU-NEXT:    s_endpgm
18308;
18309; GFX12-WGP-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18310; GFX12-WGP:       ; %bb.0: ; %entry
18311; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18312; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18313; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18314; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18315; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18316; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18317; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18318; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18319; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18320; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
18321; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18322; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18323; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18324; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
18325; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18326; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18327; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18328; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18329; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
18330; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18331; GFX12-WGP-NEXT:    s_endpgm
18332;
18333; GFX12-CU-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
18334; GFX12-CU:       ; %bb.0: ; %entry
18335; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18336; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18337; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18338; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18339; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18340; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18341; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18342; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18343; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18344; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
18345; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18346; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18347; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18348; GFX12-CU-NEXT:    s_wait_storecnt 0x0
18349; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18350; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18351; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18352; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18353; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
18354; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18355; GFX12-CU-NEXT:    s_endpgm
18356    ptr addrspace(1) %out, i32 %in, i32 %old) {
18357entry:
18358  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18359  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic
18360  %val0 = extractvalue { i32, i1 } %val, 0
18361  store i32 %val0, ptr addrspace(1) %out, align 4
18362  ret void
18363}
18364
18365define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
18366; GFX6-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18367; GFX6:       ; %bb.0: ; %entry
18368; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18369; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18370; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18371; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18372; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18373; GFX6-NEXT:    s_mov_b32 s12, s5
18374; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18375; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18376; GFX6-NEXT:    s_mov_b32 s11, -1
18377; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18378; GFX6-NEXT:    s_mov_b32 s5, s12
18379; GFX6-NEXT:    s_mov_b32 s6, s11
18380; GFX6-NEXT:    s_mov_b32 s7, s10
18381; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18382; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18383; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18384; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18385; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18386; GFX6-NEXT:    s_waitcnt vmcnt(0)
18387; GFX6-NEXT:    buffer_wbinvl1
18388; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18389; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18390; GFX6-NEXT:    s_endpgm
18391;
18392; GFX7-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18393; GFX7:       ; %bb.0: ; %entry
18394; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18395; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18396; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18397; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18398; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18399; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18400; GFX7-NEXT:    s_mov_b32 s6, s4
18401; GFX7-NEXT:    s_mov_b32 s7, s5
18402; GFX7-NEXT:    s_mov_b32 s11, s12
18403; GFX7-NEXT:    s_mov_b32 s10, s13
18404; GFX7-NEXT:    s_add_u32 s6, s6, s11
18405; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18406; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18407; GFX7-NEXT:    s_mov_b32 s7, s10
18408; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18409; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18410; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18411; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18412; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18413; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18414; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18415; GFX7-NEXT:    s_waitcnt vmcnt(0)
18416; GFX7-NEXT:    buffer_wbinvl1_vol
18417; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18418; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18419; GFX7-NEXT:    flat_store_dword v[0:1], v2
18420; GFX7-NEXT:    s_endpgm
18421;
18422; GFX10-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18423; GFX10-WGP:       ; %bb.0: ; %entry
18424; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18425; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18426; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18427; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18428; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18429; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18430; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18431; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18432; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18433; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18434; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18435; GFX10-WGP-NEXT:    buffer_gl1_inv
18436; GFX10-WGP-NEXT:    buffer_gl0_inv
18437; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18438; GFX10-WGP-NEXT:    s_endpgm
18439;
18440; GFX10-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18441; GFX10-CU:       ; %bb.0: ; %entry
18442; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18443; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18444; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18445; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18446; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18447; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18448; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18449; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18450; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18451; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18452; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18453; GFX10-CU-NEXT:    buffer_gl1_inv
18454; GFX10-CU-NEXT:    buffer_gl0_inv
18455; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18456; GFX10-CU-NEXT:    s_endpgm
18457;
18458; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18459; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18460; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18461; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18462; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18463; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18464; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18465; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18466; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18467; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18468; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18469; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18470; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18471; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18472; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18473; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18474; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18475; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18476; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18477; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18478; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18479; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18480; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18481; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18482; SKIP-CACHE-INV-NEXT:    s_endpgm
18483;
18484; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18485; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18486; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18487; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18488; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18489; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18490; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18491; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18492; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18493; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18494; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18495; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18496; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18497; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
18498; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
18499; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18500; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18501;
18502; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18503; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18504; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18505; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18506; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18507; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18508; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18509; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18510; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18511; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18512; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18513; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18514; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18515; GFX90A-TGSPLIT-NEXT:    buffer_invl2
18516; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
18517; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18518; GFX90A-TGSPLIT-NEXT:    s_endpgm
18519;
18520; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18521; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18522; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18523; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18524; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18525; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18526; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18527; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18528; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18529; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18530; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18531; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18532; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18533; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
18534; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18535; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18536;
18537; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18538; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18539; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18540; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18541; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18542; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18543; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18544; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18545; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18546; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18547; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18548; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18549; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18550; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
18551; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18552; GFX940-TGSPLIT-NEXT:    s_endpgm
18553;
18554; GFX11-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18555; GFX11-WGP:       ; %bb.0: ; %entry
18556; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18557; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18558; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18559; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18560; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18561; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18562; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18563; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18564; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18565; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18566; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18567; GFX11-WGP-NEXT:    buffer_gl1_inv
18568; GFX11-WGP-NEXT:    buffer_gl0_inv
18569; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18570; GFX11-WGP-NEXT:    s_endpgm
18571;
18572; GFX11-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18573; GFX11-CU:       ; %bb.0: ; %entry
18574; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18575; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18576; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18577; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18578; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18579; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18580; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18581; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18582; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18583; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18584; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18585; GFX11-CU-NEXT:    buffer_gl1_inv
18586; GFX11-CU-NEXT:    buffer_gl0_inv
18587; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18588; GFX11-CU-NEXT:    s_endpgm
18589;
18590; GFX12-WGP-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18591; GFX12-WGP:       ; %bb.0: ; %entry
18592; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18593; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18594; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18595; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18596; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18597; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18598; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18599; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18600; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18601; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18602; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
18603; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
18604; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18605; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
18606; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18607; GFX12-WGP-NEXT:    s_endpgm
18608;
18609; GFX12-CU-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
18610; GFX12-CU:       ; %bb.0: ; %entry
18611; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18612; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18613; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18614; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18615; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18616; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18617; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18618; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18619; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18620; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18621; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
18622; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
18623; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18624; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
18625; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18626; GFX12-CU-NEXT:    s_endpgm
18627    ptr addrspace(1) %out, i32 %in, i32 %old) {
18628entry:
18629  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18630  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire
18631  %val0 = extractvalue { i32, i1 } %val, 0
18632  store i32 %val0, ptr addrspace(1) %out, align 4
18633  ret void
18634}
18635
18636define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
18637; GFX6-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18638; GFX6:       ; %bb.0: ; %entry
18639; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18640; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18641; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18642; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18643; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18644; GFX6-NEXT:    s_mov_b32 s12, s5
18645; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18646; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18647; GFX6-NEXT:    s_mov_b32 s11, -1
18648; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18649; GFX6-NEXT:    s_mov_b32 s5, s12
18650; GFX6-NEXT:    s_mov_b32 s6, s11
18651; GFX6-NEXT:    s_mov_b32 s7, s10
18652; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18653; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18654; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18655; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18656; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18657; GFX6-NEXT:    s_waitcnt vmcnt(0)
18658; GFX6-NEXT:    buffer_wbinvl1
18659; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18660; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18661; GFX6-NEXT:    s_endpgm
18662;
18663; GFX7-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18664; GFX7:       ; %bb.0: ; %entry
18665; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18666; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18667; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18668; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18669; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18670; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18671; GFX7-NEXT:    s_mov_b32 s6, s4
18672; GFX7-NEXT:    s_mov_b32 s7, s5
18673; GFX7-NEXT:    s_mov_b32 s11, s12
18674; GFX7-NEXT:    s_mov_b32 s10, s13
18675; GFX7-NEXT:    s_add_u32 s6, s6, s11
18676; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18677; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18678; GFX7-NEXT:    s_mov_b32 s7, s10
18679; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18680; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18681; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18682; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18683; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18684; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18685; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18686; GFX7-NEXT:    s_waitcnt vmcnt(0)
18687; GFX7-NEXT:    buffer_wbinvl1_vol
18688; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18689; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18690; GFX7-NEXT:    flat_store_dword v[0:1], v2
18691; GFX7-NEXT:    s_endpgm
18692;
18693; GFX10-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18694; GFX10-WGP:       ; %bb.0: ; %entry
18695; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18696; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18697; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18698; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18699; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18700; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18701; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18702; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18703; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18704; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18705; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18706; GFX10-WGP-NEXT:    buffer_gl1_inv
18707; GFX10-WGP-NEXT:    buffer_gl0_inv
18708; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18709; GFX10-WGP-NEXT:    s_endpgm
18710;
18711; GFX10-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18712; GFX10-CU:       ; %bb.0: ; %entry
18713; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18714; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18715; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18716; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18717; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18718; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18719; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18720; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18721; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18722; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18723; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18724; GFX10-CU-NEXT:    buffer_gl1_inv
18725; GFX10-CU-NEXT:    buffer_gl0_inv
18726; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
18727; GFX10-CU-NEXT:    s_endpgm
18728;
18729; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18730; SKIP-CACHE-INV:       ; %bb.0: ; %entry
18731; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
18732; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
18733; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
18734; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
18735; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
18736; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
18737; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
18738; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
18739; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
18740; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
18741; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
18742; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
18743; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
18744; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
18745; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
18746; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18747; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
18748; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
18749; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18750; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18751; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
18752; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
18753; SKIP-CACHE-INV-NEXT:    s_endpgm
18754;
18755; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18756; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
18757; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18758; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18759; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18760; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18761; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18762; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18763; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18764; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18765; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18766; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18767; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18768; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
18769; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
18770; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18771; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
18772;
18773; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18774; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
18775; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18776; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18777; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
18778; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
18779; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18780; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
18781; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
18782; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18783; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18784; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
18785; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18786; GFX90A-TGSPLIT-NEXT:    buffer_invl2
18787; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
18788; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
18789; GFX90A-TGSPLIT-NEXT:    s_endpgm
18790;
18791; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18792; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
18793; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18794; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18795; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18796; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18797; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18798; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18799; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18800; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18801; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18802; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18803; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18804; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
18805; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18806; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
18807;
18808; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18809; GFX940-TGSPLIT:       ; %bb.0: ; %entry
18810; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
18811; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
18812; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
18813; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
18814; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
18815; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
18816; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
18817; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18818; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
18819; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
18820; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
18821; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
18822; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
18823; GFX940-TGSPLIT-NEXT:    s_endpgm
18824;
18825; GFX11-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18826; GFX11-WGP:       ; %bb.0: ; %entry
18827; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
18828; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18829; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18830; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18831; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18832; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
18833; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
18834; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18835; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
18836; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18837; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
18838; GFX11-WGP-NEXT:    buffer_gl1_inv
18839; GFX11-WGP-NEXT:    buffer_gl0_inv
18840; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18841; GFX11-WGP-NEXT:    s_endpgm
18842;
18843; GFX11-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18844; GFX11-CU:       ; %bb.0: ; %entry
18845; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
18846; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18847; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18848; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18849; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
18850; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
18851; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
18852; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18853; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
18854; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
18855; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
18856; GFX11-CU-NEXT:    buffer_gl1_inv
18857; GFX11-CU-NEXT:    buffer_gl0_inv
18858; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18859; GFX11-CU-NEXT:    s_endpgm
18860;
18861; GFX12-WGP-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18862; GFX12-WGP:       ; %bb.0: ; %entry
18863; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
18864; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18865; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
18866; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
18867; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
18868; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
18869; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
18870; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18871; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
18872; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18873; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
18874; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
18875; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
18876; GFX12-WGP-NEXT:    s_endpgm
18877;
18878; GFX12-CU-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
18879; GFX12-CU:       ; %bb.0: ; %entry
18880; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
18881; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
18882; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
18883; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
18884; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
18885; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
18886; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
18887; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18888; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
18889; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
18890; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
18891; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
18892; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
18893; GFX12-CU-NEXT:    s_endpgm
18894    ptr addrspace(1) %out, i32 %in, i32 %old) {
18895entry:
18896  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
18897  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire
18898  %val0 = extractvalue { i32, i1 } %val, 0
18899  store i32 %val0, ptr addrspace(1) %out, align 4
18900  ret void
18901}
18902
18903define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
18904; GFX6-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
18905; GFX6:       ; %bb.0: ; %entry
18906; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
18907; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18908; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
18909; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
18910; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
18911; GFX6-NEXT:    s_mov_b32 s12, s5
18912; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
18913; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
18914; GFX6-NEXT:    s_mov_b32 s11, -1
18915; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
18916; GFX6-NEXT:    s_mov_b32 s5, s12
18917; GFX6-NEXT:    s_mov_b32 s6, s11
18918; GFX6-NEXT:    s_mov_b32 s7, s10
18919; GFX6-NEXT:    v_mov_b32_e32 v0, s9
18920; GFX6-NEXT:    v_mov_b32_e32 v2, s8
18921; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
18922; GFX6-NEXT:    v_mov_b32_e32 v1, v2
18923; GFX6-NEXT:    s_waitcnt vmcnt(0)
18924; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
18925; GFX6-NEXT:    s_waitcnt vmcnt(0)
18926; GFX6-NEXT:    buffer_wbinvl1
18927; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
18928; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
18929; GFX6-NEXT:    s_endpgm
18930;
18931; GFX7-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
18932; GFX7:       ; %bb.0: ; %entry
18933; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
18934; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
18935; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
18936; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
18937; GFX7-NEXT:    s_mov_b64 s[12:13], 16
18938; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
18939; GFX7-NEXT:    s_mov_b32 s6, s4
18940; GFX7-NEXT:    s_mov_b32 s7, s5
18941; GFX7-NEXT:    s_mov_b32 s11, s12
18942; GFX7-NEXT:    s_mov_b32 s10, s13
18943; GFX7-NEXT:    s_add_u32 s6, s6, s11
18944; GFX7-NEXT:    s_addc_u32 s10, s7, s10
18945; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
18946; GFX7-NEXT:    s_mov_b32 s7, s10
18947; GFX7-NEXT:    v_mov_b32_e32 v2, s9
18948; GFX7-NEXT:    v_mov_b32_e32 v0, s8
18949; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
18950; GFX7-NEXT:    v_mov_b32_e32 v3, v0
18951; GFX7-NEXT:    v_mov_b32_e32 v0, s6
18952; GFX7-NEXT:    v_mov_b32_e32 v1, s7
18953; GFX7-NEXT:    s_waitcnt vmcnt(0)
18954; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
18955; GFX7-NEXT:    s_waitcnt vmcnt(0)
18956; GFX7-NEXT:    buffer_wbinvl1_vol
18957; GFX7-NEXT:    v_mov_b32_e32 v0, s4
18958; GFX7-NEXT:    v_mov_b32_e32 v1, s5
18959; GFX7-NEXT:    flat_store_dword v[0:1], v2
18960; GFX7-NEXT:    s_endpgm
18961;
18962; GFX10-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
18963; GFX10-WGP:       ; %bb.0: ; %entry
18964; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
18965; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18966; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
18967; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
18968; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
18969; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
18970; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
18971; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18972; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
18973; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18974; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
18975; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18976; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
18977; GFX10-WGP-NEXT:    buffer_gl1_inv
18978; GFX10-WGP-NEXT:    buffer_gl0_inv
18979; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
18980; GFX10-WGP-NEXT:    s_endpgm
18981;
18982; GFX10-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
18983; GFX10-CU:       ; %bb.0: ; %entry
18984; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
18985; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
18986; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
18987; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
18988; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
18989; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
18990; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
18991; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
18992; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
18993; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18994; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
18995; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
18996; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
18997; GFX10-CU-NEXT:    buffer_gl1_inv
18998; GFX10-CU-NEXT:    buffer_gl0_inv
18999; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19000; GFX10-CU-NEXT:    s_endpgm
19001;
19002; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19003; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19004; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19005; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19006; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19007; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19008; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19009; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19010; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19011; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19012; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19013; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19014; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19015; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19016; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19017; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19018; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19019; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19020; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19021; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19022; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19023; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19024; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19025; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19026; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19027; SKIP-CACHE-INV-NEXT:    s_endpgm
19028;
19029; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19030; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19031; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19032; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19033; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19034; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19035; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19036; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19037; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19038; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19039; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19040; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
19041; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19042; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19043; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19044; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
19045; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19046; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19047; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19048;
19049; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19050; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19051; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19052; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19053; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19054; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19055; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19056; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19057; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19058; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19059; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19060; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
19061; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19062; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19063; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19064; GFX90A-TGSPLIT-NEXT:    buffer_invl2
19065; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19066; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19067; GFX90A-TGSPLIT-NEXT:    s_endpgm
19068;
19069; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19070; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19071; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19072; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19073; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19074; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19075; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19076; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19077; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19078; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19079; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19080; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19081; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19082; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19083; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19084; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
19085; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19086; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19087;
19088; GFX940-TGSPLIT-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19089; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19090; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19091; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19092; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19093; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19094; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19095; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19096; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19097; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19098; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19099; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19100; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19101; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19102; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19103; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
19104; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19105; GFX940-TGSPLIT-NEXT:    s_endpgm
19106;
19107; GFX11-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19108; GFX11-WGP:       ; %bb.0: ; %entry
19109; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19110; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19111; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19112; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19113; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19114; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19115; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19116; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19117; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19118; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19119; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19120; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19121; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19122; GFX11-WGP-NEXT:    buffer_gl1_inv
19123; GFX11-WGP-NEXT:    buffer_gl0_inv
19124; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19125; GFX11-WGP-NEXT:    s_endpgm
19126;
19127; GFX11-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19128; GFX11-CU:       ; %bb.0: ; %entry
19129; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19130; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19131; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19132; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19133; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19134; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19135; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19136; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19137; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19138; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19139; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19140; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19141; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19142; GFX11-CU-NEXT:    buffer_gl1_inv
19143; GFX11-CU-NEXT:    buffer_gl0_inv
19144; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19145; GFX11-CU-NEXT:    s_endpgm
19146;
19147; GFX12-WGP-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19148; GFX12-WGP:       ; %bb.0: ; %entry
19149; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19150; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19151; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19152; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19153; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19154; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19155; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19156; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19157; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19158; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
19159; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19160; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19161; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19162; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
19163; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19164; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19165; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19166; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19167; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
19168; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19169; GFX12-WGP-NEXT:    s_endpgm
19170;
19171; GFX12-CU-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
19172; GFX12-CU:       ; %bb.0: ; %entry
19173; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19174; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19175; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19176; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19177; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19178; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19179; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19180; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19181; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19182; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
19183; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19184; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19185; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19186; GFX12-CU-NEXT:    s_wait_storecnt 0x0
19187; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19188; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19189; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19190; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19191; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
19192; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19193; GFX12-CU-NEXT:    s_endpgm
19194    ptr addrspace(1) %out, i32 %in, i32 %old) {
19195entry:
19196  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19197  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release acquire
19198  %val0 = extractvalue { i32, i1 } %val, 0
19199  store i32 %val0, ptr addrspace(1) %out, align 4
19200  ret void
19201}
19202
19203define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
19204; GFX6-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19205; GFX6:       ; %bb.0: ; %entry
19206; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19207; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19208; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19209; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19210; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19211; GFX6-NEXT:    s_mov_b32 s12, s5
19212; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19213; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19214; GFX6-NEXT:    s_mov_b32 s11, -1
19215; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19216; GFX6-NEXT:    s_mov_b32 s5, s12
19217; GFX6-NEXT:    s_mov_b32 s6, s11
19218; GFX6-NEXT:    s_mov_b32 s7, s10
19219; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19220; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19221; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19222; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19223; GFX6-NEXT:    s_waitcnt vmcnt(0)
19224; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19225; GFX6-NEXT:    s_waitcnt vmcnt(0)
19226; GFX6-NEXT:    buffer_wbinvl1
19227; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19228; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19229; GFX6-NEXT:    s_endpgm
19230;
19231; GFX7-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19232; GFX7:       ; %bb.0: ; %entry
19233; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19234; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19235; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19236; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19237; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19238; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19239; GFX7-NEXT:    s_mov_b32 s6, s4
19240; GFX7-NEXT:    s_mov_b32 s7, s5
19241; GFX7-NEXT:    s_mov_b32 s11, s12
19242; GFX7-NEXT:    s_mov_b32 s10, s13
19243; GFX7-NEXT:    s_add_u32 s6, s6, s11
19244; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19245; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19246; GFX7-NEXT:    s_mov_b32 s7, s10
19247; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19248; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19249; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19250; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19251; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19252; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19253; GFX7-NEXT:    s_waitcnt vmcnt(0)
19254; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19255; GFX7-NEXT:    s_waitcnt vmcnt(0)
19256; GFX7-NEXT:    buffer_wbinvl1_vol
19257; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19258; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19259; GFX7-NEXT:    flat_store_dword v[0:1], v2
19260; GFX7-NEXT:    s_endpgm
19261;
19262; GFX10-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19263; GFX10-WGP:       ; %bb.0: ; %entry
19264; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19265; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19266; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19267; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19268; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19269; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19270; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19271; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19272; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19273; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19274; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19275; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19276; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19277; GFX10-WGP-NEXT:    buffer_gl1_inv
19278; GFX10-WGP-NEXT:    buffer_gl0_inv
19279; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19280; GFX10-WGP-NEXT:    s_endpgm
19281;
19282; GFX10-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19283; GFX10-CU:       ; %bb.0: ; %entry
19284; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19285; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19286; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19287; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19288; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19289; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19290; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19291; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19292; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19293; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19294; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19295; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19296; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19297; GFX10-CU-NEXT:    buffer_gl1_inv
19298; GFX10-CU-NEXT:    buffer_gl0_inv
19299; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19300; GFX10-CU-NEXT:    s_endpgm
19301;
19302; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19303; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19304; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19305; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19306; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19307; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19308; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19309; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19310; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19311; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19312; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19313; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19314; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19315; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19316; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19317; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19318; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19319; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19320; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19321; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19322; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19323; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19324; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19325; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19326; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19327; SKIP-CACHE-INV-NEXT:    s_endpgm
19328;
19329; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19330; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19331; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19332; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19333; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19334; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19335; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19336; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19337; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19338; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19339; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19340; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
19341; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19342; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19343; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19344; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
19345; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19346; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19347; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19348;
19349; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19350; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19351; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19352; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19353; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19354; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19355; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19356; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19357; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19358; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19359; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19360; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
19361; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19362; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19363; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19364; GFX90A-TGSPLIT-NEXT:    buffer_invl2
19365; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19366; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19367; GFX90A-TGSPLIT-NEXT:    s_endpgm
19368;
19369; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19370; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19371; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19372; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19373; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19374; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19375; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19376; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19377; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19378; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19379; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19380; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19381; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19382; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19383; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19384; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
19385; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19386; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19387;
19388; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19389; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19390; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19391; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19392; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19393; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19394; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19395; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19396; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19397; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19398; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19399; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19400; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19401; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19402; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19403; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
19404; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19405; GFX940-TGSPLIT-NEXT:    s_endpgm
19406;
19407; GFX11-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19408; GFX11-WGP:       ; %bb.0: ; %entry
19409; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19410; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19411; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19412; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19413; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19414; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19415; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19416; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19417; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19418; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19419; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19420; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19421; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19422; GFX11-WGP-NEXT:    buffer_gl1_inv
19423; GFX11-WGP-NEXT:    buffer_gl0_inv
19424; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19425; GFX11-WGP-NEXT:    s_endpgm
19426;
19427; GFX11-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19428; GFX11-CU:       ; %bb.0: ; %entry
19429; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19430; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19431; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19432; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19433; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19434; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19435; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19436; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19437; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19438; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19439; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19440; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19441; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19442; GFX11-CU-NEXT:    buffer_gl1_inv
19443; GFX11-CU-NEXT:    buffer_gl0_inv
19444; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19445; GFX11-CU-NEXT:    s_endpgm
19446;
19447; GFX12-WGP-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19448; GFX12-WGP:       ; %bb.0: ; %entry
19449; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19450; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19451; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19452; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19453; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19454; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19455; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19456; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19457; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19458; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
19459; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19460; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19461; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19462; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
19463; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19464; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19465; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19466; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19467; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
19468; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19469; GFX12-WGP-NEXT:    s_endpgm
19470;
19471; GFX12-CU-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
19472; GFX12-CU:       ; %bb.0: ; %entry
19473; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19474; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19475; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19476; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19477; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19478; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19479; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19480; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19481; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19482; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
19483; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19484; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19485; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19486; GFX12-CU-NEXT:    s_wait_storecnt 0x0
19487; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19488; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19489; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19490; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19491; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
19492; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19493; GFX12-CU-NEXT:    s_endpgm
19494    ptr addrspace(1) %out, i32 %in, i32 %old) {
19495entry:
19496  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19497  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire
19498  %val0 = extractvalue { i32, i1 } %val, 0
19499  store i32 %val0, ptr addrspace(1) %out, align 4
19500  ret void
19501}
19502
19503define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
19504; GFX6-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19505; GFX6:       ; %bb.0: ; %entry
19506; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19507; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19508; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19509; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19510; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19511; GFX6-NEXT:    s_mov_b32 s12, s5
19512; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19513; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19514; GFX6-NEXT:    s_mov_b32 s11, -1
19515; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19516; GFX6-NEXT:    s_mov_b32 s5, s12
19517; GFX6-NEXT:    s_mov_b32 s6, s11
19518; GFX6-NEXT:    s_mov_b32 s7, s10
19519; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19520; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19521; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19522; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19523; GFX6-NEXT:    s_waitcnt vmcnt(0)
19524; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19525; GFX6-NEXT:    s_waitcnt vmcnt(0)
19526; GFX6-NEXT:    buffer_wbinvl1
19527; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19528; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19529; GFX6-NEXT:    s_endpgm
19530;
19531; GFX7-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19532; GFX7:       ; %bb.0: ; %entry
19533; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19534; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19535; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19536; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19537; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19538; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19539; GFX7-NEXT:    s_mov_b32 s6, s4
19540; GFX7-NEXT:    s_mov_b32 s7, s5
19541; GFX7-NEXT:    s_mov_b32 s11, s12
19542; GFX7-NEXT:    s_mov_b32 s10, s13
19543; GFX7-NEXT:    s_add_u32 s6, s6, s11
19544; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19545; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19546; GFX7-NEXT:    s_mov_b32 s7, s10
19547; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19548; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19549; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19550; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19551; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19552; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19553; GFX7-NEXT:    s_waitcnt vmcnt(0)
19554; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19555; GFX7-NEXT:    s_waitcnt vmcnt(0)
19556; GFX7-NEXT:    buffer_wbinvl1_vol
19557; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19558; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19559; GFX7-NEXT:    flat_store_dword v[0:1], v2
19560; GFX7-NEXT:    s_endpgm
19561;
19562; GFX10-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19563; GFX10-WGP:       ; %bb.0: ; %entry
19564; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19565; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19566; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19567; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19568; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19569; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19570; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19571; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19572; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19573; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19574; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19575; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19576; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19577; GFX10-WGP-NEXT:    buffer_gl1_inv
19578; GFX10-WGP-NEXT:    buffer_gl0_inv
19579; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19580; GFX10-WGP-NEXT:    s_endpgm
19581;
19582; GFX10-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19583; GFX10-CU:       ; %bb.0: ; %entry
19584; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19585; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19586; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19587; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19588; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19589; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19590; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19591; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19592; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19593; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19594; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19595; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19596; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19597; GFX10-CU-NEXT:    buffer_gl1_inv
19598; GFX10-CU-NEXT:    buffer_gl0_inv
19599; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19600; GFX10-CU-NEXT:    s_endpgm
19601;
19602; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19603; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19604; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19605; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19606; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19607; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19608; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19609; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19610; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19611; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19612; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19613; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19614; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19615; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19616; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19617; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19618; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19619; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19620; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19621; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19622; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19623; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19624; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19625; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19626; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19627; SKIP-CACHE-INV-NEXT:    s_endpgm
19628;
19629; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19630; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19631; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19632; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19633; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19634; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19635; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19636; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19637; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19638; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19639; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19640; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
19641; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19642; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19643; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19644; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
19645; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19646; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19647; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19648;
19649; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19650; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19651; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19652; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19653; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19654; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19655; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19656; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19657; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19658; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19659; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19660; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
19661; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19662; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19663; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19664; GFX90A-TGSPLIT-NEXT:    buffer_invl2
19665; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19666; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19667; GFX90A-TGSPLIT-NEXT:    s_endpgm
19668;
19669; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19670; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19671; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19672; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19673; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19674; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19675; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19676; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19677; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19678; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19679; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19680; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19681; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19682; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19683; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19684; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
19685; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19686; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19687;
19688; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19689; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19690; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19691; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19692; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19693; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19694; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19695; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19696; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19697; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19698; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19699; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19700; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19701; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19702; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19703; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
19704; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19705; GFX940-TGSPLIT-NEXT:    s_endpgm
19706;
19707; GFX11-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19708; GFX11-WGP:       ; %bb.0: ; %entry
19709; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
19710; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19711; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19712; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19713; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19714; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
19715; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
19716; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19717; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
19718; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19719; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19720; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19721; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
19722; GFX11-WGP-NEXT:    buffer_gl1_inv
19723; GFX11-WGP-NEXT:    buffer_gl0_inv
19724; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19725; GFX11-WGP-NEXT:    s_endpgm
19726;
19727; GFX11-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19728; GFX11-CU:       ; %bb.0: ; %entry
19729; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
19730; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19731; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19732; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19733; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
19734; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
19735; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
19736; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19737; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
19738; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19739; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19740; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
19741; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
19742; GFX11-CU-NEXT:    buffer_gl1_inv
19743; GFX11-CU-NEXT:    buffer_gl0_inv
19744; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19745; GFX11-CU-NEXT:    s_endpgm
19746;
19747; GFX12-WGP-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19748; GFX12-WGP:       ; %bb.0: ; %entry
19749; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
19750; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19751; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
19752; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
19753; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
19754; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
19755; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
19756; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19757; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
19758; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
19759; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19760; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19761; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19762; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
19763; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19764; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
19765; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
19766; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
19767; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
19768; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
19769; GFX12-WGP-NEXT:    s_endpgm
19770;
19771; GFX12-CU-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
19772; GFX12-CU:       ; %bb.0: ; %entry
19773; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
19774; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
19775; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
19776; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
19777; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
19778; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
19779; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
19780; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19781; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
19782; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
19783; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19784; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19785; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19786; GFX12-CU-NEXT:    s_wait_storecnt 0x0
19787; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
19788; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
19789; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
19790; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
19791; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
19792; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
19793; GFX12-CU-NEXT:    s_endpgm
19794    ptr addrspace(1) %out, i32 %in, i32 %old) {
19795entry:
19796  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
19797  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire
19798  %val0 = extractvalue { i32, i1 } %val, 0
19799  store i32 %val0, ptr addrspace(1) %out, align 4
19800  ret void
19801}
19802
19803define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
19804; GFX6-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19805; GFX6:       ; %bb.0: ; %entry
19806; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
19807; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19808; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
19809; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
19810; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
19811; GFX6-NEXT:    s_mov_b32 s12, s5
19812; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
19813; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
19814; GFX6-NEXT:    s_mov_b32 s11, -1
19815; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
19816; GFX6-NEXT:    s_mov_b32 s5, s12
19817; GFX6-NEXT:    s_mov_b32 s6, s11
19818; GFX6-NEXT:    s_mov_b32 s7, s10
19819; GFX6-NEXT:    v_mov_b32_e32 v0, s9
19820; GFX6-NEXT:    v_mov_b32_e32 v2, s8
19821; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19822; GFX6-NEXT:    v_mov_b32_e32 v1, v2
19823; GFX6-NEXT:    s_waitcnt vmcnt(0)
19824; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
19825; GFX6-NEXT:    s_waitcnt vmcnt(0)
19826; GFX6-NEXT:    buffer_wbinvl1
19827; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19828; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
19829; GFX6-NEXT:    s_endpgm
19830;
19831; GFX7-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19832; GFX7:       ; %bb.0: ; %entry
19833; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
19834; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
19835; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
19836; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
19837; GFX7-NEXT:    s_mov_b64 s[12:13], 16
19838; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
19839; GFX7-NEXT:    s_mov_b32 s6, s4
19840; GFX7-NEXT:    s_mov_b32 s7, s5
19841; GFX7-NEXT:    s_mov_b32 s11, s12
19842; GFX7-NEXT:    s_mov_b32 s10, s13
19843; GFX7-NEXT:    s_add_u32 s6, s6, s11
19844; GFX7-NEXT:    s_addc_u32 s10, s7, s10
19845; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
19846; GFX7-NEXT:    s_mov_b32 s7, s10
19847; GFX7-NEXT:    v_mov_b32_e32 v2, s9
19848; GFX7-NEXT:    v_mov_b32_e32 v0, s8
19849; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19850; GFX7-NEXT:    v_mov_b32_e32 v3, v0
19851; GFX7-NEXT:    v_mov_b32_e32 v0, s6
19852; GFX7-NEXT:    v_mov_b32_e32 v1, s7
19853; GFX7-NEXT:    s_waitcnt vmcnt(0)
19854; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
19855; GFX7-NEXT:    s_waitcnt vmcnt(0)
19856; GFX7-NEXT:    buffer_wbinvl1_vol
19857; GFX7-NEXT:    v_mov_b32_e32 v0, s4
19858; GFX7-NEXT:    v_mov_b32_e32 v1, s5
19859; GFX7-NEXT:    flat_store_dword v[0:1], v2
19860; GFX7-NEXT:    s_endpgm
19861;
19862; GFX10-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19863; GFX10-WGP:       ; %bb.0: ; %entry
19864; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
19865; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19866; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
19867; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
19868; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
19869; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
19870; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
19871; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19872; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
19873; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19874; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
19875; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19876; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
19877; GFX10-WGP-NEXT:    buffer_gl1_inv
19878; GFX10-WGP-NEXT:    buffer_gl0_inv
19879; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
19880; GFX10-WGP-NEXT:    s_endpgm
19881;
19882; GFX10-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19883; GFX10-CU:       ; %bb.0: ; %entry
19884; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
19885; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19886; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
19887; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
19888; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
19889; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
19890; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
19891; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
19892; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
19893; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19894; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
19895; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
19896; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
19897; GFX10-CU-NEXT:    buffer_gl1_inv
19898; GFX10-CU-NEXT:    buffer_gl0_inv
19899; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
19900; GFX10-CU-NEXT:    s_endpgm
19901;
19902; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19903; SKIP-CACHE-INV:       ; %bb.0: ; %entry
19904; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
19905; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
19906; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
19907; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
19908; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
19909; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
19910; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
19911; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
19912; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
19913; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
19914; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
19915; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
19916; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
19917; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
19918; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
19919; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
19920; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
19921; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19922; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
19923; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19924; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
19925; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
19926; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
19927; SKIP-CACHE-INV-NEXT:    s_endpgm
19928;
19929; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19930; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
19931; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19932; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19933; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19934; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19935; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19936; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19937; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19938; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19939; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19940; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
19941; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19942; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19943; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19944; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
19945; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
19946; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19947; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
19948;
19949; GFX90A-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19950; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
19951; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19952; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
19953; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
19954; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
19955; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19956; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
19957; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
19958; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19959; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19960; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
19961; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19962; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
19963; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19964; GFX90A-TGSPLIT-NEXT:    buffer_invl2
19965; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
19966; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
19967; GFX90A-TGSPLIT-NEXT:    s_endpgm
19968;
19969; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19970; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
19971; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19972; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19973; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19974; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19975; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19976; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19977; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19978; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19979; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19980; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
19981; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19982; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
19983; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
19984; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
19985; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
19986; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
19987;
19988; GFX940-TGSPLIT-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
19989; GFX940-TGSPLIT:       ; %bb.0: ; %entry
19990; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
19991; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
19992; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
19993; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
19994; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
19995; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
19996; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
19997; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
19998; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
19999; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20000; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20001; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20002; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20003; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
20004; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20005; GFX940-TGSPLIT-NEXT:    s_endpgm
20006;
20007; GFX11-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
20008; GFX11-WGP:       ; %bb.0: ; %entry
20009; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20010; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20011; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20012; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20013; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20014; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20015; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20016; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20017; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20018; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20019; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20020; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20021; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20022; GFX11-WGP-NEXT:    buffer_gl1_inv
20023; GFX11-WGP-NEXT:    buffer_gl0_inv
20024; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20025; GFX11-WGP-NEXT:    s_endpgm
20026;
20027; GFX11-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
20028; GFX11-CU:       ; %bb.0: ; %entry
20029; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20030; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20031; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20032; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20033; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20034; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20035; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20036; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20037; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20038; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20039; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20040; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20041; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20042; GFX11-CU-NEXT:    buffer_gl1_inv
20043; GFX11-CU-NEXT:    buffer_gl0_inv
20044; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20045; GFX11-CU-NEXT:    s_endpgm
20046;
20047; GFX12-WGP-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
20048; GFX12-WGP:       ; %bb.0: ; %entry
20049; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20050; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20051; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20052; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20053; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20054; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20055; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20056; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20057; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20058; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
20059; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20060; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20061; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20062; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20063; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20064; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20065; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20066; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20067; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
20068; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20069; GFX12-WGP-NEXT:    s_endpgm
20070;
20071; GFX12-CU-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
20072; GFX12-CU:       ; %bb.0: ; %entry
20073; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20074; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20075; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20076; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20077; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20078; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20079; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20080; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20081; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20082; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
20083; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20084; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20085; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20086; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20087; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20088; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20089; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20090; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20091; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
20092; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20093; GFX12-CU-NEXT:    s_endpgm
20094    ptr addrspace(1) %out, i32 %in, i32 %old) {
20095entry:
20096  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20097  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst
20098  %val0 = extractvalue { i32, i1 } %val, 0
20099  store i32 %val0, ptr addrspace(1) %out, align 4
20100  ret void
20101}
20102
20103define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
20104; GFX6-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20105; GFX6:       ; %bb.0: ; %entry
20106; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20107; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20108; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20109; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20110; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20111; GFX6-NEXT:    s_mov_b32 s12, s5
20112; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20113; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20114; GFX6-NEXT:    s_mov_b32 s11, -1
20115; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20116; GFX6-NEXT:    s_mov_b32 s5, s12
20117; GFX6-NEXT:    s_mov_b32 s6, s11
20118; GFX6-NEXT:    s_mov_b32 s7, s10
20119; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20120; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20121; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20122; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20123; GFX6-NEXT:    s_waitcnt vmcnt(0)
20124; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20125; GFX6-NEXT:    s_waitcnt vmcnt(0)
20126; GFX6-NEXT:    buffer_wbinvl1
20127; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20128; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20129; GFX6-NEXT:    s_endpgm
20130;
20131; GFX7-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20132; GFX7:       ; %bb.0: ; %entry
20133; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20134; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20135; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20136; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20137; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20138; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20139; GFX7-NEXT:    s_mov_b32 s6, s4
20140; GFX7-NEXT:    s_mov_b32 s7, s5
20141; GFX7-NEXT:    s_mov_b32 s11, s12
20142; GFX7-NEXT:    s_mov_b32 s10, s13
20143; GFX7-NEXT:    s_add_u32 s6, s6, s11
20144; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20145; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20146; GFX7-NEXT:    s_mov_b32 s7, s10
20147; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20148; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20149; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20150; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20151; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20152; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20153; GFX7-NEXT:    s_waitcnt vmcnt(0)
20154; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20155; GFX7-NEXT:    s_waitcnt vmcnt(0)
20156; GFX7-NEXT:    buffer_wbinvl1_vol
20157; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20158; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20159; GFX7-NEXT:    flat_store_dword v[0:1], v2
20160; GFX7-NEXT:    s_endpgm
20161;
20162; GFX10-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20163; GFX10-WGP:       ; %bb.0: ; %entry
20164; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20165; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20166; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20167; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20168; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20169; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20170; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20171; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20172; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20173; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20174; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20175; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20176; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20177; GFX10-WGP-NEXT:    buffer_gl1_inv
20178; GFX10-WGP-NEXT:    buffer_gl0_inv
20179; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20180; GFX10-WGP-NEXT:    s_endpgm
20181;
20182; GFX10-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20183; GFX10-CU:       ; %bb.0: ; %entry
20184; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20185; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20186; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20187; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20188; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20189; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20190; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20191; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20192; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20193; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20194; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20195; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20196; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20197; GFX10-CU-NEXT:    buffer_gl1_inv
20198; GFX10-CU-NEXT:    buffer_gl0_inv
20199; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20200; GFX10-CU-NEXT:    s_endpgm
20201;
20202; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20203; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20204; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20205; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20206; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20207; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20208; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20209; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20210; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20211; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20212; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20213; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20214; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20215; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20216; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20217; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20218; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20219; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20220; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20221; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20222; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20223; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20224; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20225; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20226; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20227; SKIP-CACHE-INV-NEXT:    s_endpgm
20228;
20229; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20230; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20231; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20232; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20233; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20234; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20235; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20236; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20237; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20238; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20239; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20240; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
20241; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20242; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20243; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20244; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
20245; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20246; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20247; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20248;
20249; GFX90A-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20250; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20251; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20252; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20253; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20254; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20255; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20256; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20257; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20258; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20259; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20260; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
20261; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20262; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20263; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20264; GFX90A-TGSPLIT-NEXT:    buffer_invl2
20265; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20266; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20267; GFX90A-TGSPLIT-NEXT:    s_endpgm
20268;
20269; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20270; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20271; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20272; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20273; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20274; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20275; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20276; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20277; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20278; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20279; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20280; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20281; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20282; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20283; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20284; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
20285; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20286; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20287;
20288; GFX940-TGSPLIT-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20289; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20290; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20291; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20292; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20293; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20294; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20295; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20296; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20297; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20298; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20299; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20300; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20301; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20302; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20303; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
20304; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20305; GFX940-TGSPLIT-NEXT:    s_endpgm
20306;
20307; GFX11-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20308; GFX11-WGP:       ; %bb.0: ; %entry
20309; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20310; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20311; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20312; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20313; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20314; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20315; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20316; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20317; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20318; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20319; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20320; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20321; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20322; GFX11-WGP-NEXT:    buffer_gl1_inv
20323; GFX11-WGP-NEXT:    buffer_gl0_inv
20324; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20325; GFX11-WGP-NEXT:    s_endpgm
20326;
20327; GFX11-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20328; GFX11-CU:       ; %bb.0: ; %entry
20329; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20330; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20331; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20332; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20333; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20334; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20335; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20336; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20337; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20338; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20339; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20340; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20341; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20342; GFX11-CU-NEXT:    buffer_gl1_inv
20343; GFX11-CU-NEXT:    buffer_gl0_inv
20344; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20345; GFX11-CU-NEXT:    s_endpgm
20346;
20347; GFX12-WGP-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20348; GFX12-WGP:       ; %bb.0: ; %entry
20349; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20350; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20351; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20352; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20353; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20354; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20355; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20356; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20357; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20358; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
20359; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20360; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20361; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20362; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20363; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20364; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20365; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
20366; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20367; GFX12-WGP-NEXT:    s_endpgm
20368;
20369; GFX12-CU-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
20370; GFX12-CU:       ; %bb.0: ; %entry
20371; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20372; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20373; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20374; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20375; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20376; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20377; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20378; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20379; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20380; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
20381; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20382; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20383; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20384; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20385; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20386; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20387; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
20388; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20389; GFX12-CU-NEXT:    s_endpgm
20390    ptr addrspace(1) %out, i32 %in, i32 %old) {
20391entry:
20392  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20393  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst
20394  %val0 = extractvalue { i32, i1 } %val, 0
20395  store i32 %val0, ptr addrspace(1) %out, align 4
20396  ret void
20397}
20398
20399define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
20400; GFX6-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20401; GFX6:       ; %bb.0: ; %entry
20402; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20403; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20404; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20405; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20406; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20407; GFX6-NEXT:    s_mov_b32 s12, s5
20408; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20409; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20410; GFX6-NEXT:    s_mov_b32 s11, -1
20411; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20412; GFX6-NEXT:    s_mov_b32 s5, s12
20413; GFX6-NEXT:    s_mov_b32 s6, s11
20414; GFX6-NEXT:    s_mov_b32 s7, s10
20415; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20416; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20417; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20418; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20419; GFX6-NEXT:    s_waitcnt vmcnt(0)
20420; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20421; GFX6-NEXT:    s_waitcnt vmcnt(0)
20422; GFX6-NEXT:    buffer_wbinvl1
20423; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20424; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20425; GFX6-NEXT:    s_endpgm
20426;
20427; GFX7-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20428; GFX7:       ; %bb.0: ; %entry
20429; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20430; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20431; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20432; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20433; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20434; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20435; GFX7-NEXT:    s_mov_b32 s6, s4
20436; GFX7-NEXT:    s_mov_b32 s7, s5
20437; GFX7-NEXT:    s_mov_b32 s11, s12
20438; GFX7-NEXT:    s_mov_b32 s10, s13
20439; GFX7-NEXT:    s_add_u32 s6, s6, s11
20440; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20441; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20442; GFX7-NEXT:    s_mov_b32 s7, s10
20443; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20444; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20445; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20446; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20447; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20448; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20449; GFX7-NEXT:    s_waitcnt vmcnt(0)
20450; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20451; GFX7-NEXT:    s_waitcnt vmcnt(0)
20452; GFX7-NEXT:    buffer_wbinvl1_vol
20453; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20454; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20455; GFX7-NEXT:    flat_store_dword v[0:1], v2
20456; GFX7-NEXT:    s_endpgm
20457;
20458; GFX10-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20459; GFX10-WGP:       ; %bb.0: ; %entry
20460; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20461; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20462; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20463; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20464; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20465; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20466; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20467; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20468; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20469; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20470; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20471; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20472; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20473; GFX10-WGP-NEXT:    buffer_gl1_inv
20474; GFX10-WGP-NEXT:    buffer_gl0_inv
20475; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20476; GFX10-WGP-NEXT:    s_endpgm
20477;
20478; GFX10-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20479; GFX10-CU:       ; %bb.0: ; %entry
20480; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20481; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20482; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20483; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20484; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20485; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20486; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20487; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20488; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20489; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20490; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20491; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20492; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20493; GFX10-CU-NEXT:    buffer_gl1_inv
20494; GFX10-CU-NEXT:    buffer_gl0_inv
20495; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20496; GFX10-CU-NEXT:    s_endpgm
20497;
20498; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20499; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20500; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20501; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20502; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20503; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20504; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20505; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20506; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20507; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20508; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20509; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20510; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20511; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20512; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20513; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20514; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20515; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20516; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20517; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20518; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20519; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20520; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20521; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20522; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20523; SKIP-CACHE-INV-NEXT:    s_endpgm
20524;
20525; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20526; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20527; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20528; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20529; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20530; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20531; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20532; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20533; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20534; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20535; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20536; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
20537; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20538; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20539; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20540; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
20541; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20542; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20543; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20544;
20545; GFX90A-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20546; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20547; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20548; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20549; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20550; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20551; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20552; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20553; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20554; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20555; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20556; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
20557; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20558; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20559; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20560; GFX90A-TGSPLIT-NEXT:    buffer_invl2
20561; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20562; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20563; GFX90A-TGSPLIT-NEXT:    s_endpgm
20564;
20565; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20566; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20567; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20568; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20569; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20570; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20571; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20572; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20573; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20574; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20575; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20576; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20577; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20578; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20579; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20580; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
20581; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20582; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20583;
20584; GFX940-TGSPLIT-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20585; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20586; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20587; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20588; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20589; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20590; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20591; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20592; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20593; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20594; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20595; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20596; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20597; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20598; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20599; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
20600; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20601; GFX940-TGSPLIT-NEXT:    s_endpgm
20602;
20603; GFX11-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20604; GFX11-WGP:       ; %bb.0: ; %entry
20605; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20606; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20607; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20608; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20609; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20610; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20611; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20612; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20613; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20614; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20615; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20616; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20617; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20618; GFX11-WGP-NEXT:    buffer_gl1_inv
20619; GFX11-WGP-NEXT:    buffer_gl0_inv
20620; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20621; GFX11-WGP-NEXT:    s_endpgm
20622;
20623; GFX11-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20624; GFX11-CU:       ; %bb.0: ; %entry
20625; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20626; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20627; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20628; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20629; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20630; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20631; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20632; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20633; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20634; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20635; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20636; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20637; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20638; GFX11-CU-NEXT:    buffer_gl1_inv
20639; GFX11-CU-NEXT:    buffer_gl0_inv
20640; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20641; GFX11-CU-NEXT:    s_endpgm
20642;
20643; GFX12-WGP-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20644; GFX12-WGP:       ; %bb.0: ; %entry
20645; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20646; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20647; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20648; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20649; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20650; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20651; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20652; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20653; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20654; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
20655; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20656; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20657; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20658; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20659; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20660; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20661; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20662; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20663; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
20664; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20665; GFX12-WGP-NEXT:    s_endpgm
20666;
20667; GFX12-CU-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
20668; GFX12-CU:       ; %bb.0: ; %entry
20669; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20670; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20671; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20672; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20673; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20674; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20675; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20676; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20677; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20678; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
20679; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20680; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20681; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20682; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20683; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20684; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20685; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20686; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20687; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
20688; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20689; GFX12-CU-NEXT:    s_endpgm
20690    ptr addrspace(1) %out, i32 %in, i32 %old) {
20691entry:
20692  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20693  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst
20694  %val0 = extractvalue { i32, i1 } %val, 0
20695  store i32 %val0, ptr addrspace(1) %out, align 4
20696  ret void
20697}
20698
20699define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
20700; GFX6-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20701; GFX6:       ; %bb.0: ; %entry
20702; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
20703; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20704; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
20705; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
20706; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
20707; GFX6-NEXT:    s_mov_b32 s12, s5
20708; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
20709; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
20710; GFX6-NEXT:    s_mov_b32 s11, -1
20711; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
20712; GFX6-NEXT:    s_mov_b32 s5, s12
20713; GFX6-NEXT:    s_mov_b32 s6, s11
20714; GFX6-NEXT:    s_mov_b32 s7, s10
20715; GFX6-NEXT:    v_mov_b32_e32 v0, s9
20716; GFX6-NEXT:    v_mov_b32_e32 v2, s8
20717; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20718; GFX6-NEXT:    v_mov_b32_e32 v1, v2
20719; GFX6-NEXT:    s_waitcnt vmcnt(0)
20720; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
20721; GFX6-NEXT:    s_waitcnt vmcnt(0)
20722; GFX6-NEXT:    buffer_wbinvl1
20723; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20724; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
20725; GFX6-NEXT:    s_endpgm
20726;
20727; GFX7-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20728; GFX7:       ; %bb.0: ; %entry
20729; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
20730; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
20731; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
20732; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
20733; GFX7-NEXT:    s_mov_b64 s[12:13], 16
20734; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
20735; GFX7-NEXT:    s_mov_b32 s6, s4
20736; GFX7-NEXT:    s_mov_b32 s7, s5
20737; GFX7-NEXT:    s_mov_b32 s11, s12
20738; GFX7-NEXT:    s_mov_b32 s10, s13
20739; GFX7-NEXT:    s_add_u32 s6, s6, s11
20740; GFX7-NEXT:    s_addc_u32 s10, s7, s10
20741; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
20742; GFX7-NEXT:    s_mov_b32 s7, s10
20743; GFX7-NEXT:    v_mov_b32_e32 v2, s9
20744; GFX7-NEXT:    v_mov_b32_e32 v0, s8
20745; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20746; GFX7-NEXT:    v_mov_b32_e32 v3, v0
20747; GFX7-NEXT:    v_mov_b32_e32 v0, s6
20748; GFX7-NEXT:    v_mov_b32_e32 v1, s7
20749; GFX7-NEXT:    s_waitcnt vmcnt(0)
20750; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
20751; GFX7-NEXT:    s_waitcnt vmcnt(0)
20752; GFX7-NEXT:    buffer_wbinvl1_vol
20753; GFX7-NEXT:    v_mov_b32_e32 v0, s4
20754; GFX7-NEXT:    v_mov_b32_e32 v1, s5
20755; GFX7-NEXT:    flat_store_dword v[0:1], v2
20756; GFX7-NEXT:    s_endpgm
20757;
20758; GFX10-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20759; GFX10-WGP:       ; %bb.0: ; %entry
20760; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
20761; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20762; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
20763; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
20764; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20765; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
20766; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
20767; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20768; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
20769; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20770; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20771; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20772; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
20773; GFX10-WGP-NEXT:    buffer_gl1_inv
20774; GFX10-WGP-NEXT:    buffer_gl0_inv
20775; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
20776; GFX10-WGP-NEXT:    s_endpgm
20777;
20778; GFX10-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20779; GFX10-CU:       ; %bb.0: ; %entry
20780; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
20781; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20782; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
20783; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
20784; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
20785; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
20786; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
20787; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20788; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
20789; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20790; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20791; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
20792; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
20793; GFX10-CU-NEXT:    buffer_gl1_inv
20794; GFX10-CU-NEXT:    buffer_gl0_inv
20795; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
20796; GFX10-CU-NEXT:    s_endpgm
20797;
20798; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20799; SKIP-CACHE-INV:       ; %bb.0: ; %entry
20800; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
20801; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
20802; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
20803; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
20804; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
20805; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
20806; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
20807; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
20808; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
20809; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
20810; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
20811; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
20812; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
20813; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
20814; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
20815; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
20816; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
20817; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20818; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
20819; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20820; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
20821; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
20822; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
20823; SKIP-CACHE-INV-NEXT:    s_endpgm
20824;
20825; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20826; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
20827; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20828; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20829; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20830; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20831; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20832; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20833; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20834; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20835; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20836; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
20837; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20838; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20839; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20840; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
20841; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
20842; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20843; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
20844;
20845; GFX90A-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20846; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
20847; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20848; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
20849; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
20850; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
20851; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20852; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
20853; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
20854; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20855; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20856; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
20857; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20858; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
20859; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20860; GFX90A-TGSPLIT-NEXT:    buffer_invl2
20861; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
20862; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
20863; GFX90A-TGSPLIT-NEXT:    s_endpgm
20864;
20865; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20866; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
20867; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20868; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20869; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20870; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20871; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20872; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20873; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20874; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20875; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20876; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20877; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20878; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20879; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20880; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
20881; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20882; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
20883;
20884; GFX940-TGSPLIT-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20885; GFX940-TGSPLIT:       ; %bb.0: ; %entry
20886; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
20887; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
20888; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
20889; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
20890; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
20891; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
20892; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
20893; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
20894; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
20895; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
20896; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20897; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
20898; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
20899; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
20900; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
20901; GFX940-TGSPLIT-NEXT:    s_endpgm
20902;
20903; GFX11-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20904; GFX11-WGP:       ; %bb.0: ; %entry
20905; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
20906; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20907; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20908; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20909; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
20910; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
20911; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
20912; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20913; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
20914; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20915; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
20916; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20917; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
20918; GFX11-WGP-NEXT:    buffer_gl1_inv
20919; GFX11-WGP-NEXT:    buffer_gl0_inv
20920; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20921; GFX11-WGP-NEXT:    s_endpgm
20922;
20923; GFX11-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20924; GFX11-CU:       ; %bb.0: ; %entry
20925; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
20926; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20927; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20928; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20929; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
20930; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
20931; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
20932; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20933; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
20934; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20935; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
20936; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
20937; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
20938; GFX11-CU-NEXT:    buffer_gl1_inv
20939; GFX11-CU-NEXT:    buffer_gl0_inv
20940; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20941; GFX11-CU-NEXT:    s_endpgm
20942;
20943; GFX12-WGP-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20944; GFX12-WGP:       ; %bb.0: ; %entry
20945; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
20946; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20947; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
20948; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
20949; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
20950; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
20951; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
20952; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20953; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
20954; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
20955; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20956; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20957; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20958; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
20959; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20960; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
20961; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
20962; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
20963; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
20964; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
20965; GFX12-WGP-NEXT:    s_endpgm
20966;
20967; GFX12-CU-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
20968; GFX12-CU:       ; %bb.0: ; %entry
20969; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
20970; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
20971; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
20972; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
20973; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
20974; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
20975; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
20976; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
20977; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
20978; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
20979; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20980; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20981; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20982; GFX12-CU-NEXT:    s_wait_storecnt 0x0
20983; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
20984; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
20985; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
20986; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
20987; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
20988; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
20989; GFX12-CU-NEXT:    s_endpgm
20990    ptr addrspace(1) %out, i32 %in, i32 %old) {
20991entry:
20992  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
20993  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst
20994  %val0 = extractvalue { i32, i1 } %val, 0
20995  store i32 %val0, ptr addrspace(1) %out, align 4
20996  ret void
20997}
20998
20999define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
21000; GFX6-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21001; GFX6:       ; %bb.0: ; %entry
21002; GFX6-NEXT:    s_mov_b64 s[6:7], s[8:9]
21003; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21004; GFX6-NEXT:    s_load_dword s9, s[6:7], 0x2
21005; GFX6-NEXT:    s_load_dword s8, s[6:7], 0x3
21006; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
21007; GFX6-NEXT:    s_mov_b32 s12, s5
21008; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5
21009; GFX6-NEXT:    s_mov_b32 s10, 0x100f000
21010; GFX6-NEXT:    s_mov_b32 s11, -1
21011; GFX6-NEXT:    ; kill: def $sgpr4 killed $sgpr4 def $sgpr4_sgpr5_sgpr6_sgpr7
21012; GFX6-NEXT:    s_mov_b32 s5, s12
21013; GFX6-NEXT:    s_mov_b32 s6, s11
21014; GFX6-NEXT:    s_mov_b32 s7, s10
21015; GFX6-NEXT:    v_mov_b32_e32 v0, s9
21016; GFX6-NEXT:    v_mov_b32_e32 v2, s8
21017; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21018; GFX6-NEXT:    v_mov_b32_e32 v1, v2
21019; GFX6-NEXT:    s_waitcnt vmcnt(0)
21020; GFX6-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[4:7], 0 offset:16 glc
21021; GFX6-NEXT:    s_waitcnt vmcnt(0)
21022; GFX6-NEXT:    buffer_wbinvl1
21023; GFX6-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21024; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
21025; GFX6-NEXT:    s_endpgm
21026;
21027; GFX7-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21028; GFX7:       ; %bb.0: ; %entry
21029; GFX7-NEXT:    s_mov_b64 s[6:7], s[8:9]
21030; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
21031; GFX7-NEXT:    s_load_dword s9, s[6:7], 0x2
21032; GFX7-NEXT:    s_load_dword s8, s[6:7], 0x3
21033; GFX7-NEXT:    s_mov_b64 s[12:13], 16
21034; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
21035; GFX7-NEXT:    s_mov_b32 s6, s4
21036; GFX7-NEXT:    s_mov_b32 s7, s5
21037; GFX7-NEXT:    s_mov_b32 s11, s12
21038; GFX7-NEXT:    s_mov_b32 s10, s13
21039; GFX7-NEXT:    s_add_u32 s6, s6, s11
21040; GFX7-NEXT:    s_addc_u32 s10, s7, s10
21041; GFX7-NEXT:    ; kill: def $sgpr6 killed $sgpr6 def $sgpr6_sgpr7
21042; GFX7-NEXT:    s_mov_b32 s7, s10
21043; GFX7-NEXT:    v_mov_b32_e32 v2, s9
21044; GFX7-NEXT:    v_mov_b32_e32 v0, s8
21045; GFX7-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21046; GFX7-NEXT:    v_mov_b32_e32 v3, v0
21047; GFX7-NEXT:    v_mov_b32_e32 v0, s6
21048; GFX7-NEXT:    v_mov_b32_e32 v1, s7
21049; GFX7-NEXT:    s_waitcnt vmcnt(0)
21050; GFX7-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
21051; GFX7-NEXT:    s_waitcnt vmcnt(0)
21052; GFX7-NEXT:    buffer_wbinvl1_vol
21053; GFX7-NEXT:    v_mov_b32_e32 v0, s4
21054; GFX7-NEXT:    v_mov_b32_e32 v1, s5
21055; GFX7-NEXT:    flat_store_dword v[0:1], v2
21056; GFX7-NEXT:    s_endpgm
21057;
21058; GFX10-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21059; GFX10-WGP:       ; %bb.0: ; %entry
21060; GFX10-WGP-NEXT:    v_mov_b32_e32 v0, 0
21061; GFX10-WGP-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21062; GFX10-WGP-NEXT:    s_load_dword s7, s[8:9], 0x8
21063; GFX10-WGP-NEXT:    s_load_dword s6, s[8:9], 0xc
21064; GFX10-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21065; GFX10-WGP-NEXT:    v_mov_b32_e32 v1, s7
21066; GFX10-WGP-NEXT:    v_mov_b32_e32 v3, s6
21067; GFX10-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21068; GFX10-WGP-NEXT:    v_mov_b32_e32 v2, v3
21069; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21070; GFX10-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21071; GFX10-WGP-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21072; GFX10-WGP-NEXT:    s_waitcnt vmcnt(0)
21073; GFX10-WGP-NEXT:    buffer_gl1_inv
21074; GFX10-WGP-NEXT:    buffer_gl0_inv
21075; GFX10-WGP-NEXT:    global_store_dword v0, v1, s[4:5]
21076; GFX10-WGP-NEXT:    s_endpgm
21077;
21078; GFX10-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21079; GFX10-CU:       ; %bb.0: ; %entry
21080; GFX10-CU-NEXT:    v_mov_b32_e32 v0, 0
21081; GFX10-CU-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21082; GFX10-CU-NEXT:    s_load_dword s7, s[8:9], 0x8
21083; GFX10-CU-NEXT:    s_load_dword s6, s[8:9], 0xc
21084; GFX10-CU-NEXT:    s_waitcnt lgkmcnt(0)
21085; GFX10-CU-NEXT:    v_mov_b32_e32 v1, s7
21086; GFX10-CU-NEXT:    v_mov_b32_e32 v3, s6
21087; GFX10-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21088; GFX10-CU-NEXT:    v_mov_b32_e32 v2, v3
21089; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21090; GFX10-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21091; GFX10-CU-NEXT:    global_atomic_cmpswap v1, v0, v[1:2], s[4:5] offset:16 glc
21092; GFX10-CU-NEXT:    s_waitcnt vmcnt(0)
21093; GFX10-CU-NEXT:    buffer_gl1_inv
21094; GFX10-CU-NEXT:    buffer_gl0_inv
21095; GFX10-CU-NEXT:    global_store_dword v0, v1, s[4:5]
21096; GFX10-CU-NEXT:    s_endpgm
21097;
21098; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21099; SKIP-CACHE-INV:       ; %bb.0: ; %entry
21100; SKIP-CACHE-INV-NEXT:    s_mov_b64 s[2:3], s[4:5]
21101; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
21102; SKIP-CACHE-INV-NEXT:    s_load_dword s5, s[2:3], 0x2
21103; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[2:3], 0x3
21104; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
21105; SKIP-CACHE-INV-NEXT:    s_mov_b32 s8, s1
21106; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1
21107; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0xf000
21108; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, -1
21109; SKIP-CACHE-INV-NEXT:    ; kill: def $sgpr0 killed $sgpr0 def $sgpr0_sgpr1_sgpr2_sgpr3
21110; SKIP-CACHE-INV-NEXT:    s_mov_b32 s1, s8
21111; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, s7
21112; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, s6
21113; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s5
21114; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s4
21115; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec
21116; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, v2
21117; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21118; SKIP-CACHE-INV-NEXT:    buffer_atomic_cmpswap v[0:1], off, s[0:3], 0 offset:16 glc
21119; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21120; SKIP-CACHE-INV-NEXT:    ; kill: def $vgpr0 killed $vgpr0 killed $vgpr0_vgpr1 killed $exec
21121; SKIP-CACHE-INV-NEXT:    s_waitcnt vmcnt(0)
21122; SKIP-CACHE-INV-NEXT:    buffer_store_dword v0, off, s[0:3], 0
21123; SKIP-CACHE-INV-NEXT:    s_endpgm
21124;
21125; GFX90A-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21126; GFX90A-NOTTGSPLIT:       ; %bb.0: ; %entry
21127; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21128; GFX90A-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21129; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21130; GFX90A-NOTTGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21131; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21132; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21133; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21134; GFX90A-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21135; GFX90A-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21136; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbl2
21137; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21138; GFX90A-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21139; GFX90A-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21140; GFX90A-NOTTGSPLIT-NEXT:    buffer_invl2
21141; GFX90A-NOTTGSPLIT-NEXT:    buffer_wbinvl1_vol
21142; GFX90A-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21143; GFX90A-NOTTGSPLIT-NEXT:    s_endpgm
21144;
21145; GFX90A-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21146; GFX90A-TGSPLIT:       ; %bb.0: ; %entry
21147; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21148; GFX90A-TGSPLIT-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
21149; GFX90A-TGSPLIT-NEXT:    s_load_dword s7, s[8:9], 0x8
21150; GFX90A-TGSPLIT-NEXT:    s_load_dword s6, s[8:9], 0xc
21151; GFX90A-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21152; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s7
21153; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s6
21154; GFX90A-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21155; GFX90A-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21156; GFX90A-TGSPLIT-NEXT:    buffer_wbl2
21157; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21158; GFX90A-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[4:5] offset:16 glc
21159; GFX90A-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21160; GFX90A-TGSPLIT-NEXT:    buffer_invl2
21161; GFX90A-TGSPLIT-NEXT:    buffer_wbinvl1_vol
21162; GFX90A-TGSPLIT-NEXT:    global_store_dword v0, v1, s[4:5]
21163; GFX90A-TGSPLIT-NEXT:    s_endpgm
21164;
21165; GFX940-NOTTGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21166; GFX940-NOTTGSPLIT:       ; %bb.0: ; %entry
21167; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21168; GFX940-NOTTGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21169; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21170; GFX940-NOTTGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21171; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21172; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21173; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21174; GFX940-NOTTGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21175; GFX940-NOTTGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21176; GFX940-NOTTGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
21177; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21178; GFX940-NOTTGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
21179; GFX940-NOTTGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21180; GFX940-NOTTGSPLIT-NEXT:    buffer_inv sc0 sc1
21181; GFX940-NOTTGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21182; GFX940-NOTTGSPLIT-NEXT:    s_endpgm
21183;
21184; GFX940-TGSPLIT-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21185; GFX940-TGSPLIT:       ; %bb.0: ; %entry
21186; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v0, 0
21187; GFX940-TGSPLIT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
21188; GFX940-TGSPLIT-NEXT:    s_load_dword s3, s[4:5], 0x8
21189; GFX940-TGSPLIT-NEXT:    s_load_dword s2, s[4:5], 0xc
21190; GFX940-TGSPLIT-NEXT:    s_waitcnt lgkmcnt(0)
21191; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v2, s3
21192; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v1, s2
21193; GFX940-TGSPLIT-NEXT:    ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
21194; GFX940-TGSPLIT-NEXT:    v_mov_b32_e32 v3, v1
21195; GFX940-TGSPLIT-NEXT:    buffer_wbl2 sc0 sc1
21196; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21197; GFX940-TGSPLIT-NEXT:    global_atomic_cmpswap v1, v0, v[2:3], s[0:1] offset:16 sc0 sc1
21198; GFX940-TGSPLIT-NEXT:    s_waitcnt vmcnt(0)
21199; GFX940-TGSPLIT-NEXT:    buffer_inv sc0 sc1
21200; GFX940-TGSPLIT-NEXT:    global_store_dword v0, v1, s[0:1] sc0 sc1
21201; GFX940-TGSPLIT-NEXT:    s_endpgm
21202;
21203; GFX11-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21204; GFX11-WGP:       ; %bb.0: ; %entry
21205; GFX11-WGP-NEXT:    v_mov_b32_e32 v0, 0
21206; GFX11-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21207; GFX11-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21208; GFX11-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21209; GFX11-WGP-NEXT:    s_waitcnt lgkmcnt(0)
21210; GFX11-WGP-NEXT:    v_mov_b32_e32 v1, s3
21211; GFX11-WGP-NEXT:    v_mov_b32_e32 v3, s2
21212; GFX11-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21213; GFX11-WGP-NEXT:    v_mov_b32_e32 v2, v3
21214; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21215; GFX11-WGP-NEXT:    s_waitcnt_vscnt null, 0x0
21216; GFX11-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21217; GFX11-WGP-NEXT:    s_waitcnt vmcnt(0)
21218; GFX11-WGP-NEXT:    buffer_gl1_inv
21219; GFX11-WGP-NEXT:    buffer_gl0_inv
21220; GFX11-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21221; GFX11-WGP-NEXT:    s_endpgm
21222;
21223; GFX11-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21224; GFX11-CU:       ; %bb.0: ; %entry
21225; GFX11-CU-NEXT:    v_mov_b32_e32 v0, 0
21226; GFX11-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21227; GFX11-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21228; GFX11-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21229; GFX11-CU-NEXT:    s_waitcnt lgkmcnt(0)
21230; GFX11-CU-NEXT:    v_mov_b32_e32 v1, s3
21231; GFX11-CU-NEXT:    v_mov_b32_e32 v3, s2
21232; GFX11-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21233; GFX11-CU-NEXT:    v_mov_b32_e32 v2, v3
21234; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21235; GFX11-CU-NEXT:    s_waitcnt_vscnt null, 0x0
21236; GFX11-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 glc
21237; GFX11-CU-NEXT:    s_waitcnt vmcnt(0)
21238; GFX11-CU-NEXT:    buffer_gl1_inv
21239; GFX11-CU-NEXT:    buffer_gl0_inv
21240; GFX11-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21241; GFX11-CU-NEXT:    s_endpgm
21242;
21243; GFX12-WGP-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21244; GFX12-WGP:       ; %bb.0: ; %entry
21245; GFX12-WGP-NEXT:    v_mov_b32_e32 v0, 0
21246; GFX12-WGP-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21247; GFX12-WGP-NEXT:    s_load_b32 s3, s[4:5], 0x8
21248; GFX12-WGP-NEXT:    s_load_b32 s2, s[4:5], 0xc
21249; GFX12-WGP-NEXT:    s_wait_kmcnt 0x0
21250; GFX12-WGP-NEXT:    v_mov_b32_e32 v1, s3
21251; GFX12-WGP-NEXT:    v_mov_b32_e32 v3, s2
21252; GFX12-WGP-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21253; GFX12-WGP-NEXT:    v_mov_b32_e32 v2, v3
21254; GFX12-WGP-NEXT:    global_wb scope:SCOPE_SYS
21255; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21256; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21257; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21258; GFX12-WGP-NEXT:    s_wait_storecnt 0x0
21259; GFX12-WGP-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
21260; GFX12-WGP-NEXT:    s_wait_bvhcnt 0x0
21261; GFX12-WGP-NEXT:    s_wait_samplecnt 0x0
21262; GFX12-WGP-NEXT:    s_wait_loadcnt 0x0
21263; GFX12-WGP-NEXT:    global_inv scope:SCOPE_SYS
21264; GFX12-WGP-NEXT:    global_store_b32 v0, v1, s[0:1]
21265; GFX12-WGP-NEXT:    s_endpgm
21266;
21267; GFX12-CU-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
21268; GFX12-CU:       ; %bb.0: ; %entry
21269; GFX12-CU-NEXT:    v_mov_b32_e32 v0, 0
21270; GFX12-CU-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21271; GFX12-CU-NEXT:    s_load_b32 s3, s[4:5], 0x8
21272; GFX12-CU-NEXT:    s_load_b32 s2, s[4:5], 0xc
21273; GFX12-CU-NEXT:    s_wait_kmcnt 0x0
21274; GFX12-CU-NEXT:    v_mov_b32_e32 v1, s3
21275; GFX12-CU-NEXT:    v_mov_b32_e32 v3, s2
21276; GFX12-CU-NEXT:    ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec
21277; GFX12-CU-NEXT:    v_mov_b32_e32 v2, v3
21278; GFX12-CU-NEXT:    global_wb scope:SCOPE_SYS
21279; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21280; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21281; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21282; GFX12-CU-NEXT:    s_wait_storecnt 0x0
21283; GFX12-CU-NEXT:    global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS
21284; GFX12-CU-NEXT:    s_wait_bvhcnt 0x0
21285; GFX12-CU-NEXT:    s_wait_samplecnt 0x0
21286; GFX12-CU-NEXT:    s_wait_loadcnt 0x0
21287; GFX12-CU-NEXT:    global_inv scope:SCOPE_SYS
21288; GFX12-CU-NEXT:    global_store_b32 v0, v1, s[0:1]
21289; GFX12-CU-NEXT:    s_endpgm
21290    ptr addrspace(1) %out, i32 %in, i32 %old) {
21291entry:
21292  %gep = getelementptr i32, ptr addrspace(1) %out, i32 4
21293  %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst
21294  %val0 = extractvalue { i32, i1 } %val, 0
21295  store i32 %val0, ptr addrspace(1) %out, align 4
21296  ret void
21297}
21298