xref: /llvm-project/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1b0a25468SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
25a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GFX7 %s
35a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
45a3299a6SMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
5b0a25468SMatt Arsenault
6b0a25468SMatt Arsenault; ---------------------------------------------------------------------
7b0a25468SMatt Arsenault; atomicrmw xchg
8b0a25468SMatt Arsenault; ---------------------------------------------------------------------
9b0a25468SMatt Arsenault
10b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret(ptr %ptr, i64 %in) {
11b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret:
12b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
13b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
15b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
16b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
17b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
18b0a25468SMatt Arsenault;
19b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret:
20b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
21b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
23b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
24b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
25b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
26b0a25468SMatt Arsenault;
27b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret:
28b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
29b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
31b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
32b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
33b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
34b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
35b0a25468SMatt Arsenault  ret void
36b0a25468SMatt Arsenault}
37b0a25468SMatt Arsenault
38b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret_offset(ptr %out, i64 %in) {
39b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset:
40b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
41b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
43b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
44b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
45b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
46b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
47b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
48b0a25468SMatt Arsenault;
49b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset:
50b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
51b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
53b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
54b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
55b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
56b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
57b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
58b0a25468SMatt Arsenault;
59b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset:
60b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
61b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3] offset:32
63b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
64b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
65b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
66b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
67b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
68b0a25468SMatt Arsenault  ret void
69b0a25468SMatt Arsenault}
70b0a25468SMatt Arsenault
71b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret(ptr %ptr, i64 %in) {
72b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret:
73b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
74b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
76b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
77b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
78b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
79b0a25468SMatt Arsenault;
80b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret:
81b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
82b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
84b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
85b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
86b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
87b0a25468SMatt Arsenault;
88b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret:
89b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
90b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
92b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
93b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
94b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
95b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
96b0a25468SMatt Arsenault  ret i64 %result
97b0a25468SMatt Arsenault}
98b0a25468SMatt Arsenault
99b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret_offset(ptr %out, i64 %in) {
100b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset:
101b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
102b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
104b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
105b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
106b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
107b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
108b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
109b0a25468SMatt Arsenault;
110b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset:
111b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
112b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
114b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
115b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
116b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
117b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
118b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
119b0a25468SMatt Arsenault;
120b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset:
121b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
122b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
124b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
125b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
126b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
127b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
128b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
129b0a25468SMatt Arsenault  ret i64 %result
130b0a25468SMatt Arsenault}
131b0a25468SMatt Arsenault
132b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
133b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_scalar:
134b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
135b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
137b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
138b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
139b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
140b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
141b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
142b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
143b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
144b0a25468SMatt Arsenault;
145b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_scalar:
146b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
147b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
149b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
150b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
151b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
152b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
153b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
154b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
155b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
156b0a25468SMatt Arsenault;
157b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_scalar:
158b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
159b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
161b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
162b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
163b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
164b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
165b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
166b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
167b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
168b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
169b0a25468SMatt Arsenault  ret void
170b0a25468SMatt Arsenault}
171b0a25468SMatt Arsenault
172b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
173b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset_scalar:
174b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
175b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
177b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
178b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
179b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
180b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
181b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
182b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
183b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
184b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
185b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
186b0a25468SMatt Arsenault;
187b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset_scalar:
188b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
189b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
191b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
192b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
193b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
194b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
195b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
196b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
197b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
198b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
199b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
200b0a25468SMatt Arsenault;
201b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset_scalar:
202b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
203b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
204b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
205b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
206b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
207b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
208b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1] offset:32
209b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
210b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
211b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
212b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
213b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
214b0a25468SMatt Arsenault  ret void
215b0a25468SMatt Arsenault}
216b0a25468SMatt Arsenault
217b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
218b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_scalar:
219b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
220b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
222b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
223b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
224b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
225b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
226b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
227b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
228b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
229b0a25468SMatt Arsenault;
230b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_scalar:
231b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
232b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
234b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
235b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
236b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
237b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
238b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
239b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
240b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
241b0a25468SMatt Arsenault;
242b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_scalar:
243b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
244b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
246b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
247b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
248b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
249b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
250b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
251b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
252b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
253b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
254b0a25468SMatt Arsenault  ret i64 %result
255b0a25468SMatt Arsenault}
256b0a25468SMatt Arsenault
257b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
258b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset_scalar:
259b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
260b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
262b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
263b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
264b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
265b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
266b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
267b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
268b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
269b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
270b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
271b0a25468SMatt Arsenault;
272b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset_scalar:
273b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
274b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
276b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
277b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
278b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
279b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
280b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
281b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
282b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
283b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
284b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
285b0a25468SMatt Arsenault;
286b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset_scalar:
287b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
288b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
290b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
291b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
292b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
293b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
294b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
295b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
296b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
297b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
298b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
299b0a25468SMatt Arsenault  ret i64 %result
300b0a25468SMatt Arsenault}
301b0a25468SMatt Arsenault
302b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
303b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory:
304b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
305b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
307b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
308b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
309b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
310b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
311b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
312b0a25468SMatt Arsenault;
313b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory:
314b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
315b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
317b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
318b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
319b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
320b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
321b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
322b0a25468SMatt Arsenault;
323b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory:
324b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
325b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3] offset:32
327b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
328b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
329b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
330b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
331b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
332b0a25468SMatt Arsenault  ret void
333b0a25468SMatt Arsenault}
334b0a25468SMatt Arsenault
335b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
336b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory:
337b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
338b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
340b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
341b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
342b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
343b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
344b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
345b0a25468SMatt Arsenault;
346b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory:
347b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
348b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
350b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
351b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
352b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
353b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
354b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
355b0a25468SMatt Arsenault;
356b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory:
357b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
358b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
360b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
361b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
362b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
363b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
364b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
365b0a25468SMatt Arsenault  ret i64 %result
366b0a25468SMatt Arsenault}
367b0a25468SMatt Arsenault
368b0a25468SMatt Arsenault; ---------------------------------------------------------------------
369b0a25468SMatt Arsenault; atomicrmw xchg f64
370b0a25468SMatt Arsenault; ---------------------------------------------------------------------
371b0a25468SMatt Arsenault
372b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret(ptr %ptr, double %in) {
373b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret:
374b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
375b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
376b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
377b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
378b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
379b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
380b0a25468SMatt Arsenault;
381b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret:
382b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
383b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
385b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
386b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
387b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
388b0a25468SMatt Arsenault;
389b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret:
390b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
391b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
393b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
394b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
395b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
396b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1
397b0a25468SMatt Arsenault  ret void
398b0a25468SMatt Arsenault}
399b0a25468SMatt Arsenault
400b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret_offset(ptr %out, double %in) {
401b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset:
402b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
403b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
405b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
406b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
407b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
408b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
409b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
410b0a25468SMatt Arsenault;
411b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset:
412b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
413b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
415b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
416b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
417b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
418b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
419b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
420b0a25468SMatt Arsenault;
421b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset:
422b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
423b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3] offset:32
425b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
426b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
427b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
428b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i32 4
429b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
430b0a25468SMatt Arsenault  ret void
431b0a25468SMatt Arsenault}
432b0a25468SMatt Arsenault
433b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret(ptr %ptr, double %in) {
434b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret:
435b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
436b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
438b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
439b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
440b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
441b0a25468SMatt Arsenault;
442b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret:
443b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
444b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
446b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
447b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
448b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
449b0a25468SMatt Arsenault;
450b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret:
451b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
452b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
454b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
455b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
456b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
457b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1
458b0a25468SMatt Arsenault  ret double %result
459b0a25468SMatt Arsenault}
460b0a25468SMatt Arsenault
461b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret_offset(ptr %out, double %in) {
462b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset:
463b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
464b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
466b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
467b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
468b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
469b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
470b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
471b0a25468SMatt Arsenault;
472b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset:
473b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
474b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
476b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
477b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
478b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
479b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
480b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
481b0a25468SMatt Arsenault;
482b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset:
483b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
484b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
486b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
487b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
488b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
489b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i32 4
490b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
491b0a25468SMatt Arsenault  ret double %result
492b0a25468SMatt Arsenault}
493b0a25468SMatt Arsenault
494b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_f64_noret_scalar(ptr inreg %ptr, double inreg %in) {
495b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_scalar:
496b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
497b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
499b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
500b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
501b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
502b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
503b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
504b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
505b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
506b0a25468SMatt Arsenault;
507b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_scalar:
508b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
509b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
511b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
512b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
513b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
514b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
515b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
516b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
517b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
518b0a25468SMatt Arsenault;
519b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_scalar:
520b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
521b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
523b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
524b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
525b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
526b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
527b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
528b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
529b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
530b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1
531b0a25468SMatt Arsenault  ret void
532b0a25468SMatt Arsenault}
533b0a25468SMatt Arsenault
534b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xchg_f64_noret_offset_scalar(ptr inreg %out, double inreg %in) {
535b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset_scalar:
536b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
537b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
539b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
540b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
541b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
542b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
543b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
544b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
545b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
546b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
547b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
548b0a25468SMatt Arsenault;
549b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset_scalar:
550b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
551b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
553b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
554b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
555b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
556b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
557b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
558b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1]
559b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
560b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
561b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
562b0a25468SMatt Arsenault;
563b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset_scalar:
564b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
565b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
567b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
568b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
569b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
570b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[2:3], v[0:1] offset:32
571b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
572b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
573b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
574b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i32 4
575b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
576b0a25468SMatt Arsenault  ret void
577b0a25468SMatt Arsenault}
578b0a25468SMatt Arsenault
579b0a25468SMatt Arsenaultdefine amdgpu_gfx double @flat_atomic_xchg_f64_ret_scalar(ptr inreg %ptr, double inreg %in) {
580b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_scalar:
581b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
582b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
584b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
585b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
586b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
587b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
588b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
589b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
590b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
591b0a25468SMatt Arsenault;
592b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_scalar:
593b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
594b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
596b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
597b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
598b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
599b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
600b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
601b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
602b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
603b0a25468SMatt Arsenault;
604b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_scalar:
605b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
606b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
607b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
608b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
609b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
610b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
611b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
612b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
613b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
614b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
615b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %ptr, double %in seq_cst, !noalias.addrspace !1
616b0a25468SMatt Arsenault  ret double %result
617b0a25468SMatt Arsenault}
618b0a25468SMatt Arsenault
619b0a25468SMatt Arsenaultdefine amdgpu_gfx double @flat_atomic_xchg_f64_ret_offset_scalar(ptr inreg %out, double inreg %in) {
620b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset_scalar:
621b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
622b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
624b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
625b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
626b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
627b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
628b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
629b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
630b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
631b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
632b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
633b0a25468SMatt Arsenault;
634b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset_scalar:
635b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
636b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
638b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
639b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
640b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
641b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
642b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
643b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] glc
644b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
645b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
646b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
647b0a25468SMatt Arsenault;
648b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset_scalar:
649b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
650b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
652b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
653b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
654b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
655b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
656b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
657b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
658b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
659b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i32 4
660b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1
661b0a25468SMatt Arsenault  ret double %result
662b0a25468SMatt Arsenault}
663b0a25468SMatt Arsenault
664b0a25468SMatt Arsenaultdefine void @flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory(ptr %out, double %in) {
665b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory:
666b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
667b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
669b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
670b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
671b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
672b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
673b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
674b0a25468SMatt Arsenault;
675b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory:
676b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
677b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
679b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
680b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3]
681b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
682b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
683b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
684b0a25468SMatt Arsenault;
685b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory:
686b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
687b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[2:3] offset:32
689b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
690b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
691b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
692b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i64 4
693b0a25468SMatt Arsenault  %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
694b0a25468SMatt Arsenault  ret void
695b0a25468SMatt Arsenault}
696b0a25468SMatt Arsenault
697b0a25468SMatt Arsenaultdefine double @flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory(ptr %out, double %in) {
698b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory:
699b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
700b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
702b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
703b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
704b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
705b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
706b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
707b0a25468SMatt Arsenault;
708b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory:
709b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
710b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
712b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
713b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] glc
714b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
715b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
716b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
717b0a25468SMatt Arsenault;
718b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory:
719b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
720b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_swap_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
722b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
723b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
724b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
725b0a25468SMatt Arsenault  %gep = getelementptr double, ptr %out, i64 4
726b0a25468SMatt Arsenault  %result = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
727b0a25468SMatt Arsenault  ret double %result
728b0a25468SMatt Arsenault}
729b0a25468SMatt Arsenault
730b0a25468SMatt Arsenault; ---------------------------------------------------------------------
731b0a25468SMatt Arsenault; atomicrmw add
732b0a25468SMatt Arsenault; ---------------------------------------------------------------------
733b0a25468SMatt Arsenault
734b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret(ptr %ptr, i64 %in) {
735b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret:
736b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
737b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
739b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
740b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
741b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
742b0a25468SMatt Arsenault;
743b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret:
744b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
745b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
747b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
748b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
749b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
750b0a25468SMatt Arsenault;
751b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret:
752b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
753b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
755b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
756b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
757b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
758b0a25468SMatt Arsenault  %tmp0 = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
759b0a25468SMatt Arsenault  ret void
760b0a25468SMatt Arsenault}
761b0a25468SMatt Arsenault
762b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret_offset(ptr %out, i64 %in) {
763b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset:
764b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
765b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
767b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
768b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
769b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
770b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
771b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
772b0a25468SMatt Arsenault;
773b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset:
774b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
775b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
776b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
777b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
778b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
779b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
780b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
781b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
782b0a25468SMatt Arsenault;
783b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset:
784b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
785b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3] offset:32
787b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
788b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
789b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
790b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
791b0a25468SMatt Arsenault  %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
792b0a25468SMatt Arsenault  ret void
793b0a25468SMatt Arsenault}
794b0a25468SMatt Arsenault
795b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret(ptr %ptr, i64 %in) {
796b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret:
797b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
798b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
800b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
801b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
802b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
803b0a25468SMatt Arsenault;
804b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret:
805b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
806b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
807b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
808b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
809b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
810b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
811b0a25468SMatt Arsenault;
812b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret:
813b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
814b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
816b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
817b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
818b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
819b0a25468SMatt Arsenault  %result = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
820b0a25468SMatt Arsenault  ret i64 %result
821b0a25468SMatt Arsenault}
822b0a25468SMatt Arsenault
823b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret_offset(ptr %out, i64 %in) {
824b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset:
825b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
826b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
827b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
828b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
829b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
830b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
831b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
832b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
833b0a25468SMatt Arsenault;
834b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset:
835b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
836b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
838b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
839b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
840b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
841b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
842b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
843b0a25468SMatt Arsenault;
844b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset:
845b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
846b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
848b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
849b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
850b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
851b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
852b0a25468SMatt Arsenault  %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
853b0a25468SMatt Arsenault  ret i64 %result
854b0a25468SMatt Arsenault}
855b0a25468SMatt Arsenault
856b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_add_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
857b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_scalar:
858b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
859b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
861b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
862b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
863b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
864b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1]
865b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
866b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
867b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
868b0a25468SMatt Arsenault;
869b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_scalar:
870b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
871b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
873b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
874b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
875b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
876b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1]
877b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
878b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
879b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
880b0a25468SMatt Arsenault;
881b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_scalar:
882b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
883b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
884b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
885b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
886b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
887b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
888b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1]
889b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
890b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
891b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
892b0a25468SMatt Arsenault  %tmp0 = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
893b0a25468SMatt Arsenault  ret void
894b0a25468SMatt Arsenault}
895b0a25468SMatt Arsenault
896b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_add_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
897b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset_scalar:
898b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
899b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
900b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
901b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
902b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
903b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
904b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
905b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
906b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1]
907b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
908b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
909b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
910b0a25468SMatt Arsenault;
911b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset_scalar:
912b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
913b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
915b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
916b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
917b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
918b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
919b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
920b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1]
921b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
922b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
923b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
924b0a25468SMatt Arsenault;
925b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset_scalar:
926b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
927b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
929b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
930b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
931b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
932b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[2:3], v[0:1] offset:32
933b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
934b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
935b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
936b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
937b0a25468SMatt Arsenault  %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
938b0a25468SMatt Arsenault  ret void
939b0a25468SMatt Arsenault}
940b0a25468SMatt Arsenault
941b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_add_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
942b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_scalar:
943b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
944b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
946b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
947b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
948b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
949b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc
950b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
951b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
952b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
953b0a25468SMatt Arsenault;
954b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_scalar:
955b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
956b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
958b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
959b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
960b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
961b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc
962b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
963b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
964b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
965b0a25468SMatt Arsenault;
966b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_scalar:
967b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
968b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
969b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
970b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
971b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
972b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
973b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc
974b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
975b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
976b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
977b0a25468SMatt Arsenault  %result = atomicrmw add ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
978b0a25468SMatt Arsenault  ret i64 %result
979b0a25468SMatt Arsenault}
980b0a25468SMatt Arsenault
981b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_add_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
982b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset_scalar:
983b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
984b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
986b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
987b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
988b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
989b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
990b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
991b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc
992b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
993b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
994b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
995b0a25468SMatt Arsenault;
996b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset_scalar:
997b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
998b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
999b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
1000b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
1001b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
1002b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1003b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1004b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
1005b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] glc
1006b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1007b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1008b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1009b0a25468SMatt Arsenault;
1010b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset_scalar:
1011b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1012b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1013b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1014b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1015b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1016b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1017b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
1018b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1019b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1020b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1021b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1022b0a25468SMatt Arsenault  %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1023b0a25468SMatt Arsenault  ret i64 %result
1024b0a25468SMatt Arsenault}
1025b0a25468SMatt Arsenault
1026b0a25468SMatt Arsenaultdefine void @flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1027b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory:
1028b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1029b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1031b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1032b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
1033b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1034b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1035b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1036b0a25468SMatt Arsenault;
1037b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory:
1038b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1039b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1041b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1042b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3]
1043b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1044b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1045b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1046b0a25468SMatt Arsenault;
1047b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory:
1048b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1049b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1050b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[2:3] offset:32
1051b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1052b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1053b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1054b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1055b0a25468SMatt Arsenault  %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1056b0a25468SMatt Arsenault  ret void
1057b0a25468SMatt Arsenault}
1058b0a25468SMatt Arsenault
1059b0a25468SMatt Arsenaultdefine i64 @flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1060b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory:
1061b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1062b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1064b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1065b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
1066b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1067b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1068b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1069b0a25468SMatt Arsenault;
1070b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory:
1071b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1072b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1074b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1075b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] glc
1076b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1077b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1078b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1079b0a25468SMatt Arsenault;
1080b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory:
1081b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1082b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_add_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
1084b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1085b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1086b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1087b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1088b0a25468SMatt Arsenault  %result = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1089b0a25468SMatt Arsenault  ret i64 %result
1090b0a25468SMatt Arsenault}
1091b0a25468SMatt Arsenault
1092b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1093b0a25468SMatt Arsenault; atomicrmw sub
1094b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1095b0a25468SMatt Arsenault
1096b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret(ptr %ptr, i64 %in) {
1097b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret:
1098b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1099b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1101b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1102b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1103b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1104b0a25468SMatt Arsenault;
1105b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret:
1106b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1107b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1109b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1110b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1111b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1112b0a25468SMatt Arsenault;
1113b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret:
1114b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1115b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1117b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1118b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1119b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1120b0a25468SMatt Arsenault  %tmp0 = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1121b0a25468SMatt Arsenault  ret void
1122b0a25468SMatt Arsenault}
1123b0a25468SMatt Arsenault
1124b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret_offset(ptr %out, i64 %in) {
1125b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset:
1126b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1127b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1129b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1130b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1131b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1132b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1133b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1134b0a25468SMatt Arsenault;
1135b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset:
1136b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1137b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1138b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1139b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1140b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1141b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1142b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1143b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1144b0a25468SMatt Arsenault;
1145b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset:
1146b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1147b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1148b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3] offset:32
1149b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1150b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1151b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1152b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1153b0a25468SMatt Arsenault  %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1154b0a25468SMatt Arsenault  ret void
1155b0a25468SMatt Arsenault}
1156b0a25468SMatt Arsenault
1157b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret(ptr %ptr, i64 %in) {
1158b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret:
1159b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1160b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1162b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1163b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1164b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1165b0a25468SMatt Arsenault;
1166b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret:
1167b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1168b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1169b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1170b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1171b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1172b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1173b0a25468SMatt Arsenault;
1174b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret:
1175b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1176b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1178b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1179b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1180b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1181b0a25468SMatt Arsenault  %result = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1182b0a25468SMatt Arsenault  ret i64 %result
1183b0a25468SMatt Arsenault}
1184b0a25468SMatt Arsenault
1185b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret_offset(ptr %out, i64 %in) {
1186b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset:
1187b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1188b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1189b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1190b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1191b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1192b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1193b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1194b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1195b0a25468SMatt Arsenault;
1196b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset:
1197b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1198b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1200b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1201b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1202b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1203b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1204b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1205b0a25468SMatt Arsenault;
1206b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset:
1207b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1208b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
1210b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1211b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1212b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1213b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1214b0a25468SMatt Arsenault  %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1215b0a25468SMatt Arsenault  ret i64 %result
1216b0a25468SMatt Arsenault}
1217b0a25468SMatt Arsenault
1218b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_sub_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
1219b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_scalar:
1220b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1221b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1223b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1224b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1225b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
1226b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1]
1227b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1228b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1229b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1230b0a25468SMatt Arsenault;
1231b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_scalar:
1232b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1233b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1234b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1235b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1236b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1237b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1238b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1]
1239b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1240b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1241b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1242b0a25468SMatt Arsenault;
1243b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_scalar:
1244b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1245b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1247b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1248b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1249b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1250b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1]
1251b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1252b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1253b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1254b0a25468SMatt Arsenault  %tmp0 = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1255b0a25468SMatt Arsenault  ret void
1256b0a25468SMatt Arsenault}
1257b0a25468SMatt Arsenault
1258b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_sub_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
1259b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset_scalar:
1260b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1261b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
1263b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
1264b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
1265b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1266b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1267b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
1268b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1]
1269b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1270b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1271b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1272b0a25468SMatt Arsenault;
1273b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset_scalar:
1274b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1275b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
1277b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
1278b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
1279b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1280b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1281b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
1282b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1]
1283b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1284b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1285b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1286b0a25468SMatt Arsenault;
1287b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset_scalar:
1288b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1289b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1291b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1292b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1293b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1294b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[2:3], v[0:1] offset:32
1295b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1296b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1297b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1298b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1299b0a25468SMatt Arsenault  %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1300b0a25468SMatt Arsenault  ret void
1301b0a25468SMatt Arsenault}
1302b0a25468SMatt Arsenault
1303b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_sub_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
1304b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_scalar:
1305b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1306b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1308b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1309b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1310b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
1311b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc
1312b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1313b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1314b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1315b0a25468SMatt Arsenault;
1316b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_scalar:
1317b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1318b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1320b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1321b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1322b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1323b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc
1324b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1325b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1326b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1327b0a25468SMatt Arsenault;
1328b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_scalar:
1329b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1330b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1332b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1333b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1334b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1335b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc
1336b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1337b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1338b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1339b0a25468SMatt Arsenault  %result = atomicrmw sub ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1340b0a25468SMatt Arsenault  ret i64 %result
1341b0a25468SMatt Arsenault}
1342b0a25468SMatt Arsenault
1343b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_sub_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
1344b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset_scalar:
1345b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1346b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1347b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
1348b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
1349b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
1350b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1351b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1352b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
1353b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc
1354b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1355b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1356b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1357b0a25468SMatt Arsenault;
1358b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset_scalar:
1359b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1360b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
1362b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
1363b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
1364b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1365b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1366b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
1367b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] glc
1368b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1369b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1370b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1371b0a25468SMatt Arsenault;
1372b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset_scalar:
1373b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1374b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1376b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1377b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1378b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1379b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
1380b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1381b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1382b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1383b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1384b0a25468SMatt Arsenault  %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1385b0a25468SMatt Arsenault  ret i64 %result
1386b0a25468SMatt Arsenault}
1387b0a25468SMatt Arsenault
1388b0a25468SMatt Arsenaultdefine void @flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1389b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory:
1390b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1391b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1393b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1394b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1395b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1396b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1397b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1398b0a25468SMatt Arsenault;
1399b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory:
1400b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1401b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1403b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1404b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3]
1405b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1406b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1407b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1408b0a25468SMatt Arsenault;
1409b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory:
1410b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1411b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1412b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[2:3] offset:32
1413b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1414b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1415b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1416b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1417b0a25468SMatt Arsenault  %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1418b0a25468SMatt Arsenault  ret void
1419b0a25468SMatt Arsenault}
1420b0a25468SMatt Arsenault
1421b0a25468SMatt Arsenaultdefine i64 @flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1422b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory:
1423b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1424b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1426b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1427b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1428b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1429b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1430b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1431b0a25468SMatt Arsenault;
1432b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory:
1433b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1434b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1436b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1437b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] glc
1438b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1439b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1440b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1441b0a25468SMatt Arsenault;
1442b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory:
1443b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1444b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_sub_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
1446b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1447b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1448b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1449b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1450b0a25468SMatt Arsenault  %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1451b0a25468SMatt Arsenault  ret i64 %result
1452b0a25468SMatt Arsenault}
1453b0a25468SMatt Arsenault
1454b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1455b0a25468SMatt Arsenault; atomicrmw and
1456b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1457b0a25468SMatt Arsenault
1458b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret(ptr %ptr, i64 %in) {
1459b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret:
1460b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1461b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1462b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1463b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1464b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1465b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1466b0a25468SMatt Arsenault;
1467b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret:
1468b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1469b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1470b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1471b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1472b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1473b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1474b0a25468SMatt Arsenault;
1475b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret:
1476b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1477b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1478b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1479b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1480b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1481b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1482b0a25468SMatt Arsenault  %tmp0 = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1483b0a25468SMatt Arsenault  ret void
1484b0a25468SMatt Arsenault}
1485b0a25468SMatt Arsenault
1486b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret_offset(ptr %out, i64 %in) {
1487b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset:
1488b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1489b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1491b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1492b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1493b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1494b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1495b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1496b0a25468SMatt Arsenault;
1497b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset:
1498b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1499b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1501b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1502b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1503b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1504b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1505b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1506b0a25468SMatt Arsenault;
1507b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset:
1508b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1509b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3] offset:32
1511b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1512b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1513b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1514b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1515b0a25468SMatt Arsenault  %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1516b0a25468SMatt Arsenault  ret void
1517b0a25468SMatt Arsenault}
1518b0a25468SMatt Arsenault
1519b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret(ptr %ptr, i64 %in) {
1520b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret:
1521b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1522b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1523b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1524b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1525b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1526b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1527b0a25468SMatt Arsenault;
1528b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret:
1529b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1530b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1531b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1532b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1533b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1534b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1535b0a25468SMatt Arsenault;
1536b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret:
1537b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1538b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1540b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1541b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1542b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1543b0a25468SMatt Arsenault  %result = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1544b0a25468SMatt Arsenault  ret i64 %result
1545b0a25468SMatt Arsenault}
1546b0a25468SMatt Arsenault
1547b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret_offset(ptr %out, i64 %in) {
1548b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset:
1549b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1550b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1552b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1553b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1554b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1555b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1556b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1557b0a25468SMatt Arsenault;
1558b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset:
1559b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1560b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1561b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1562b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1563b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1564b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1565b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1566b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1567b0a25468SMatt Arsenault;
1568b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset:
1569b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1570b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1571b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
1572b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1573b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1574b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1575b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1576b0a25468SMatt Arsenault  %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1577b0a25468SMatt Arsenault  ret i64 %result
1578b0a25468SMatt Arsenault}
1579b0a25468SMatt Arsenault
1580b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_and_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
1581b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_scalar:
1582b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1583b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1584b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1585b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1586b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1587b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
1588b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1]
1589b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1590b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1591b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1592b0a25468SMatt Arsenault;
1593b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_scalar:
1594b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1595b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1597b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1598b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1599b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1600b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1]
1601b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1602b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1603b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1604b0a25468SMatt Arsenault;
1605b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_scalar:
1606b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1607b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1609b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1610b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1611b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1612b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1]
1613b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1614b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1615b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1616b0a25468SMatt Arsenault  %tmp0 = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1617b0a25468SMatt Arsenault  ret void
1618b0a25468SMatt Arsenault}
1619b0a25468SMatt Arsenault
1620b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_and_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
1621b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset_scalar:
1622b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1623b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1624b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
1625b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
1626b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
1627b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1628b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1629b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
1630b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1]
1631b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1632b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1633b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1634b0a25468SMatt Arsenault;
1635b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset_scalar:
1636b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1637b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
1639b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
1640b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
1641b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1642b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1643b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
1644b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1]
1645b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1646b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1647b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1648b0a25468SMatt Arsenault;
1649b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset_scalar:
1650b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1651b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1653b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1654b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1655b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1656b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[2:3], v[0:1] offset:32
1657b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1658b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1659b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1660b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1661b0a25468SMatt Arsenault  %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1662b0a25468SMatt Arsenault  ret void
1663b0a25468SMatt Arsenault}
1664b0a25468SMatt Arsenault
1665b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_and_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
1666b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_scalar:
1667b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1668b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1669b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1670b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1671b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
1672b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
1673b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc
1674b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1675b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1676b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1677b0a25468SMatt Arsenault;
1678b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_scalar:
1679b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1680b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1681b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1682b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1683b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
1684b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
1685b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc
1686b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1687b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1688b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1689b0a25468SMatt Arsenault;
1690b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_scalar:
1691b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1692b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1694b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1695b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1696b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1697b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc
1698b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1699b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1700b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1701b0a25468SMatt Arsenault  %result = atomicrmw and ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1702b0a25468SMatt Arsenault  ret i64 %result
1703b0a25468SMatt Arsenault}
1704b0a25468SMatt Arsenault
1705b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_and_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
1706b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset_scalar:
1707b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1708b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
1710b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
1711b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
1712b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
1713b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
1714b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
1715b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc
1716b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1717b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1718b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1719b0a25468SMatt Arsenault;
1720b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset_scalar:
1721b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1722b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1723b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
1724b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
1725b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
1726b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
1727b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
1728b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
1729b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] glc
1730b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1731b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1732b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1733b0a25468SMatt Arsenault;
1734b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset_scalar:
1735b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1736b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
1738b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
1739b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
1740b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
1741b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
1742b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1743b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1744b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1745b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1746b0a25468SMatt Arsenault  %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1747b0a25468SMatt Arsenault  ret i64 %result
1748b0a25468SMatt Arsenault}
1749b0a25468SMatt Arsenault
1750b0a25468SMatt Arsenaultdefine void @flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1751b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory:
1752b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1753b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1754b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1755b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1756b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1757b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1758b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1759b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1760b0a25468SMatt Arsenault;
1761b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory:
1762b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1763b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1765b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1766b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3]
1767b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1768b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1769b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1770b0a25468SMatt Arsenault;
1771b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory:
1772b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1773b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[2:3] offset:32
1775b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1776b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1777b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1778b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1779b0a25468SMatt Arsenault  %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1780b0a25468SMatt Arsenault  ret void
1781b0a25468SMatt Arsenault}
1782b0a25468SMatt Arsenault
1783b0a25468SMatt Arsenaultdefine i64 @flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
1784b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory:
1785b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1786b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1787b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
1788b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1789b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1790b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1791b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1792b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1793b0a25468SMatt Arsenault;
1794b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory:
1795b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1796b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1797b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
1798b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1799b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] glc
1800b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1801b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1802b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1803b0a25468SMatt Arsenault;
1804b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory:
1805b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1806b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1807b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_and_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
1808b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1809b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1810b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1811b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1812b0a25468SMatt Arsenault  %result = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
1813b0a25468SMatt Arsenault  ret i64 %result
1814b0a25468SMatt Arsenault}
1815b0a25468SMatt Arsenault
1816b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1817b0a25468SMatt Arsenault; atomicrmw nand
1818b0a25468SMatt Arsenault; ---------------------------------------------------------------------
1819b0a25468SMatt Arsenault
1820b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret(ptr %ptr, i64 %in) {
1821b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret:
1822b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1823b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 4, v0
1825b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
1826b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[0:1]
1827b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[4:5]
1828b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1829b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB50_1: ; %atomicrmw.start
1830b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
1831b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1832b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v4, v7, v3
1833b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v8, v6, v2
1834b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v5, v4
1835b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v4, v8
1836b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
1837b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1838b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1839b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1840b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
1841b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1842b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
1843b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1844b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB50_1
1845b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
1846b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
1847b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1848b0a25468SMatt Arsenault;
1849b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret:
1850b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1851b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
1853b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
1854b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[0:1]
1855b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[4:5]
1856b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1857b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB50_1: ; %atomicrmw.start
1858b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1859b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1860b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v4, v7, v3
1861b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v8, v6, v2
1862b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v5, v4
1863b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v4, v8
1864b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
1865b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1866b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1867b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1868b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
1869b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1870b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
1871b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1872b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB50_1
1873b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1874b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1875b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1876b0a25468SMatt Arsenault;
1877b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret:
1878b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1879b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1880b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
1881b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
1882b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB50_1: ; %atomicrmw.start
1883b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1884b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1885b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
1886b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
1887b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
1888b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
1889b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
1890b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1891b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1892b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1893b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
1894b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1895b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
1896b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1897b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB50_1
1898b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
1899b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
1900b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1901b0a25468SMatt Arsenault  %tmp0 = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
1902b0a25468SMatt Arsenault  ret void
1903b0a25468SMatt Arsenault}
1904b0a25468SMatt Arsenault
1905b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret_offset(ptr %out, i64 %in) {
1906b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset:
1907b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1908b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
1910b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
1911b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
1912b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1913b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
1914b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
1915b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
1916b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB51_1: ; %atomicrmw.start
1917b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
1918b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1919b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, v7, v3
1920b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, v6, v2
1921b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v5, v0
1922b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v4, v1
1923b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
1924b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1925b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
1926b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
1927b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
1928b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1929b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
1930b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1931b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB51_1
1932b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
1933b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
1934b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1935b0a25468SMatt Arsenault;
1936b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset:
1937b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
1938b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1939b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
1940b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
1941b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
1942b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
1943b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
1944b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
1945b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1946b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB51_1: ; %atomicrmw.start
1947b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1948b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1949b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, v7, v3
1950b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v1, v6, v2
1951b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v5, v0
1952b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v4, v1
1953b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
1954b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1955b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
1956b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
1957b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
1958b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1959b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
1960b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1961b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB51_1
1962b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1963b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1964b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1965b0a25468SMatt Arsenault;
1966b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset:
1967b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
1968b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
1970b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
1971b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB51_1: ; %atomicrmw.start
1972b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
1973b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1974b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
1975b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
1976b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
1977b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
1978b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
1979b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1980b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
1981b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1982b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
1983b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1984b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
1985b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1986b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB51_1
1987b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
1988b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
1989b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
1990b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
1991b0a25468SMatt Arsenault  %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
1992b0a25468SMatt Arsenault  ret void
1993b0a25468SMatt Arsenault}
1994b0a25468SMatt Arsenault
1995b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret(ptr %ptr, i64 %in) {
1996b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret:
1997b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
1998b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1999b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v5, vcc, 4, v0
2000b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
2001b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v4, v[0:1]
2002b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v5, v[5:6]
2003b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2004b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB52_1: ; %atomicrmw.start
2005b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2006b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2007b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
2008b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
2009b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v4, v7, v3
2010b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v8, v6, v2
2011b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v5, v4
2012b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v4, v8
2013b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
2014b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2015b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2016b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2017b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2018b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2019b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB52_1
2020b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2021b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
2022b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v4
2023b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v5
2024b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2025b0a25468SMatt Arsenault;
2026b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret:
2027b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2028b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2029b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
2030b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
2031b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
2032b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
2033b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
2034b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB52_1: ; %atomicrmw.start
2035b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2036b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2037b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
2038b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
2039b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v4, v7, v3
2040b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v8, v6, v2
2041b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v5, v4
2042b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v4, v8
2043b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
2044b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2045b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2046b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2047b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2048b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2049b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB52_1
2050b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2051b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
2052b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
2053b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
2054b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2055b0a25468SMatt Arsenault;
2056b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret:
2057b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2058b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2059b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
2060b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
2061b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB52_1: ; %atomicrmw.start
2062b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2063b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2064b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
2065b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
2066b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
2067b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
2068b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
2069b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
2070b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
2071b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2072b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2073b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2074b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2075b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2076b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB52_1
2077b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2078b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
2079b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
2080b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
2081b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2082b0a25468SMatt Arsenault  %result = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2083b0a25468SMatt Arsenault  ret i64 %result
2084b0a25468SMatt Arsenault}
2085b0a25468SMatt Arsenault
2086b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret_offset(ptr %out, i64 %in) {
2087b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset:
2088b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2089b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
2091b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
2092b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
2093b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2094b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
2095b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
2096b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2097b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB53_1: ; %atomicrmw.start
2098b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2099b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2100b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
2101b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
2102b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, v9, v3
2103b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, v8, v2
2104b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v7, v0
2105b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v6, v1
2106b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
2107b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2108b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2109b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
2110b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2111b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2112b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB53_1
2113b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2114b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
2115b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2116b0a25468SMatt Arsenault;
2117b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset:
2118b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2119b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2120b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
2121b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
2122b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
2123b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2124b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
2125b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
2126b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
2127b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB53_1: ; %atomicrmw.start
2128b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2129b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2130b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
2131b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
2132b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, v9, v3
2133b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v1, v8, v2
2134b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v7, v0
2135b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v6, v1
2136b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
2137b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2138b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2139b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
2140b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2141b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2142b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB53_1
2143b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2144b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
2145b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2146b0a25468SMatt Arsenault;
2147b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset:
2148b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2149b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
2151b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
2152b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB53_1: ; %atomicrmw.start
2153b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2154b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2155b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
2156b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
2157b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
2158b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
2159b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
2160b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
2161b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
2162b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2163b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2164b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2165b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2166b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2167b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB53_1
2168b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2169b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
2170b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
2171b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
2172b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2173b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2174b0a25468SMatt Arsenault  %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2175b0a25468SMatt Arsenault  ret i64 %result
2176b0a25468SMatt Arsenault}
2177b0a25468SMatt Arsenault
2178b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_nand_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
2179b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_scalar:
2180b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2181b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2183b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
2184b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2185b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
2186b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s34
2187b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s35
2188b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[0:1]
2189b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[3:4]
2190*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s4
2191b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
2192*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s5
2193b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB54_1: ; %atomicrmw.start
2194b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2195b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2196b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, s7, v3
2197b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v6, s6, v2
2198b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v1, v0
2199b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v0, v6
2200b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
2201b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2202b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2203b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2204b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
2205b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2206b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
2207b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2208b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB54_1
2209b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2210b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
2211b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2212b0a25468SMatt Arsenault;
2213b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_scalar:
2214b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2215b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2216b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
2217b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
2218b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
2219b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
2220b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s34
2221b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s35
2222b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[0:1]
2223b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[3:4]
2224*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s4
2225b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
2226*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s5
2227b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB54_1: ; %atomicrmw.start
2228b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2229b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2230b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, s7, v3
2231b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v6, s6, v2
2232b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v1, v0
2233b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v0, v6
2234b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
2235b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2236b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2237b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2238b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
2239b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2240b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
2241b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2242b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB54_1
2243b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2244b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
2245b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2246b0a25468SMatt Arsenault;
2247b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_scalar:
2248b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2249b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
2251b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
2252b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
2253*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
2254b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
2255*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
2256b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB54_1: ; %atomicrmw.start
2257b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2258b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2259b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v0, s7, v3
2260b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v6, s6, v2
2261b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v1, v0
2262b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v0, v6
2263b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
2264b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2265b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2266b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2267b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
2268b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2269b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
2270b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2271b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB54_1
2272b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2273b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
2274b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2275b0a25468SMatt Arsenault  %tmp0 = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2276b0a25468SMatt Arsenault  ret void
2277b0a25468SMatt Arsenault}
2278b0a25468SMatt Arsenault
2279b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_nand_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
2280b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset_scalar:
2281b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2282b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2283b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
2284b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
2285b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
2286b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
2287b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
2288b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
2289b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s34
2290b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v5, s35
2291b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[0:1]
2292b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[4:5]
2293*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
2294b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB55_1: ; %atomicrmw.start
2295b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2296b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2297b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, s7, v3
2298b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v6, s6, v2
2299b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v1, v0
2300b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v0, v6
2301b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
2302b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2303b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2304b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2305b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
2306*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2307b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
2308*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2309b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB55_1
2310b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2311*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
2312b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2313b0a25468SMatt Arsenault;
2314b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset_scalar:
2315b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2316b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2317b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
2318b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
2319b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
2320b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
2321b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
2322b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
2323b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s34
2324b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v5, s35
2325b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
2326b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[4:5]
2327*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
2328b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB55_1: ; %atomicrmw.start
2329b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2330b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2331b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, s7, v3
2332b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v6, s6, v2
2333b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v1, v0
2334b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v0, v6
2335b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
2336b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2337b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2338b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2339b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
2340*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2341b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
2342*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2343b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB55_1
2344b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2345*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
2346b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2347b0a25468SMatt Arsenault;
2348b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset_scalar:
2349b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2350b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
2352b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
2353b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
2354*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
2355b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
2356*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
2357b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB55_1: ; %atomicrmw.start
2358b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2359b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2360b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v0, s7, v3
2361b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v6, s6, v2
2362b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v1, v0
2363b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v0, v6
2364b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
2365b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2366b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2367b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
2368b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
2369b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2370b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
2371b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2372b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB55_1
2373b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2374b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
2375b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2376b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2377b0a25468SMatt Arsenault  %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2378b0a25468SMatt Arsenault  ret void
2379b0a25468SMatt Arsenault}
2380b0a25468SMatt Arsenault
2381b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_nand_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
2382b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_scalar:
2383b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2384b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
2386b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
2387b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
2388b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
2389b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
2390b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
2391b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[0:1]
2392b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[2:3]
2393*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2394b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
2395*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v3, s5
2396b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB56_1: ; %atomicrmw.start
2397b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2398b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2399*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, v1
2400*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, v0
2401*eeac0ffaSNikita Popov; GFX7-NEXT:    v_and_b32_e32 v0, s7, v7
2402*eeac0ffaSNikita Popov; GFX7-NEXT:    v_and_b32_e32 v1, s6, v6
2403*eeac0ffaSNikita Popov; GFX7-NEXT:    v_not_b32_e32 v5, v0
2404*eeac0ffaSNikita Popov; GFX7-NEXT:    v_not_b32_e32 v4, v1
2405*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc
2406b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2407b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2408*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2409b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2410b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2411b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB56_1
2412b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2413b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
2414b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2415b0a25468SMatt Arsenault;
2416b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_scalar:
2417b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2418b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2419b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
2420b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
2421b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
2422b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
2423b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
2424b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
2425b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[0:1]
2426b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[2:3]
2427*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2428b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
2429*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, s5
2430b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB56_1: ; %atomicrmw.start
2431b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2432b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2433*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, v1
2434*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, v0
2435*eeac0ffaSNikita Popov; GFX8-NEXT:    v_and_b32_e32 v0, s7, v7
2436*eeac0ffaSNikita Popov; GFX8-NEXT:    v_and_b32_e32 v1, s6, v6
2437*eeac0ffaSNikita Popov; GFX8-NEXT:    v_not_b32_e32 v5, v0
2438*eeac0ffaSNikita Popov; GFX8-NEXT:    v_not_b32_e32 v4, v1
2439*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc
2440b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2441b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2442*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2443b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2444b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2445b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB56_1
2446b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2447b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
2448b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2449b0a25468SMatt Arsenault;
2450b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_scalar:
2451b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2452b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2453b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
2454b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
2455b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
2456*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
2457b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
2458*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
2459b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB56_1: ; %atomicrmw.start
2460b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2461b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2462*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, v1
2463*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, v0
2464*eeac0ffaSNikita Popov; GFX9-NEXT:    v_and_b32_e32 v0, s7, v7
2465*eeac0ffaSNikita Popov; GFX9-NEXT:    v_and_b32_e32 v1, s6, v6
2466*eeac0ffaSNikita Popov; GFX9-NEXT:    v_not_b32_e32 v5, v0
2467*eeac0ffaSNikita Popov; GFX9-NEXT:    v_not_b32_e32 v4, v1
2468*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc
2469b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2470b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2471*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2472b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2473b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2474b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB56_1
2475b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2476b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
2477b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2478b0a25468SMatt Arsenault  %result = atomicrmw nand ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2479b0a25468SMatt Arsenault  ret i64 %result
2480b0a25468SMatt Arsenault}
2481b0a25468SMatt Arsenault
2482b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_nand_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
2483b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset_scalar:
2484b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2485b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2486b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
2487b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
2488b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
2489b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
2490b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
2491b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
2492b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
2493b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
2494b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
2495b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[2:3]
2496*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
2497b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB57_1: ; %atomicrmw.start
2498b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2499b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2500*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, v1
2501*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, v0
2502*eeac0ffaSNikita Popov; GFX7-NEXT:    v_and_b32_e32 v0, s7, v7
2503*eeac0ffaSNikita Popov; GFX7-NEXT:    v_and_b32_e32 v1, s6, v6
2504*eeac0ffaSNikita Popov; GFX7-NEXT:    v_not_b32_e32 v5, v0
2505*eeac0ffaSNikita Popov; GFX7-NEXT:    v_not_b32_e32 v4, v1
2506*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc
2507b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2508b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2509*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2510*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2511*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2512b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB57_1
2513b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2514*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
2515b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2516b0a25468SMatt Arsenault;
2517b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset_scalar:
2518b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2519b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
2521b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
2522b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
2523b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
2524b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
2525b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
2526b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
2527b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
2528b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
2529b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[2:3]
2530*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
2531b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB57_1: ; %atomicrmw.start
2532b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2533b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2534*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, v1
2535*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, v0
2536*eeac0ffaSNikita Popov; GFX8-NEXT:    v_and_b32_e32 v0, s7, v7
2537*eeac0ffaSNikita Popov; GFX8-NEXT:    v_and_b32_e32 v1, s6, v6
2538*eeac0ffaSNikita Popov; GFX8-NEXT:    v_not_b32_e32 v5, v0
2539*eeac0ffaSNikita Popov; GFX8-NEXT:    v_not_b32_e32 v4, v1
2540*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] glc
2541b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2542b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2543*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2544*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2545*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2546b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB57_1
2547b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2548*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
2549b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2550b0a25468SMatt Arsenault;
2551b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset_scalar:
2552b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2553b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2554b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
2555b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
2556b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] offset:32
2557*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
2558b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
2559*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
2560b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB57_1: ; %atomicrmw.start
2561b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2562b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2563*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, v1
2564*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, v0
2565*eeac0ffaSNikita Popov; GFX9-NEXT:    v_and_b32_e32 v0, s7, v7
2566*eeac0ffaSNikita Popov; GFX9-NEXT:    v_and_b32_e32 v1, s6, v6
2567*eeac0ffaSNikita Popov; GFX9-NEXT:    v_not_b32_e32 v5, v0
2568*eeac0ffaSNikita Popov; GFX9-NEXT:    v_not_b32_e32 v4, v1
2569*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[4:7] offset:32 glc
2570b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2571b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2572*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2573b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
2574b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
2575b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB57_1
2576b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2577b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
2578b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2579b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2580b0a25468SMatt Arsenault  %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2581b0a25468SMatt Arsenault  ret i64 %result
2582b0a25468SMatt Arsenault}
2583b0a25468SMatt Arsenault
2584b0a25468SMatt Arsenaultdefine void @flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
2585b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory:
2586b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2587b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
2589b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
2590b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
2591b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2592b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
2593b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
2594b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2595b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB58_1: ; %atomicrmw.start
2596b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2597b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2598b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, v7, v3
2599b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, v6, v2
2600b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v5, v0
2601b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v4, v1
2602b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
2603b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2604b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2605b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2606b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
2607b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2608b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
2609b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2610b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB58_1
2611b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2612b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
2613b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2614b0a25468SMatt Arsenault;
2615b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory:
2616b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2617b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2618b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
2619b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
2620b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
2621b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2622b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
2623b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
2624b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
2625b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB58_1: ; %atomicrmw.start
2626b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2627b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2628b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, v7, v3
2629b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v1, v6, v2
2630b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v5, v0
2631b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v4, v1
2632b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
2633b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2634b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2635b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
2636b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
2637b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2638b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
2639b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2640b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB58_1
2641b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2642b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
2643b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2644b0a25468SMatt Arsenault;
2645b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory:
2646b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2647b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2648b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
2649b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
2650b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB58_1: ; %atomicrmw.start
2651b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2652b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2653b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
2654b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
2655b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
2656b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
2657b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
2658b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2659b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2660b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2661b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
2662b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2663b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
2664b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2665b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB58_1
2666b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2667b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
2668b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2669b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2670b0a25468SMatt Arsenault  %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
2671b0a25468SMatt Arsenault  ret void
2672b0a25468SMatt Arsenault}
2673b0a25468SMatt Arsenault
2674b0a25468SMatt Arsenaultdefine i64 @flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
2675b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory:
2676b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2677b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2678b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
2679b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
2680b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
2681b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2682b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
2683b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
2684b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
2685b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB59_1: ; %atomicrmw.start
2686b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
2687b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2688b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
2689b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
2690b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v0, v9, v3
2691b0a25468SMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, v8, v2
2692b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v7, v0
2693b0a25468SMatt Arsenault; GFX7-NEXT:    v_not_b32_e32 v6, v1
2694b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
2695b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2696b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2697b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
2698b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2699b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2700b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB59_1
2701b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
2702b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
2703b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2704b0a25468SMatt Arsenault;
2705b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory:
2706b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2707b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2708b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
2709b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
2710b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
2711b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2712b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
2713b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
2714b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
2715b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB59_1: ; %atomicrmw.start
2716b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2717b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2718b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
2719b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
2720b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v0, v9, v3
2721b0a25468SMatt Arsenault; GFX8-NEXT:    v_and_b32_e32 v1, v8, v2
2722b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v7, v0
2723b0a25468SMatt Arsenault; GFX8-NEXT:    v_not_b32_e32 v6, v1
2724b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
2725b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2726b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2727b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
2728b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2729b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2730b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB59_1
2731b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
2732b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
2733b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2734b0a25468SMatt Arsenault;
2735b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory:
2736b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2737b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2738b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
2739b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
2740b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB59_1: ; %atomicrmw.start
2741b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
2742b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2743b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
2744b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
2745b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v4, v7, v3
2746b0a25468SMatt Arsenault; GFX9-NEXT:    v_and_b32_e32 v8, v6, v2
2747b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v5, v4
2748b0a25468SMatt Arsenault; GFX9-NEXT:    v_not_b32_e32 v4, v8
2749b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
2750b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2751b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2752b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
2753b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2754b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2755b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB59_1
2756b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
2757b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
2758b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
2759b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
2760b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2761b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2762b0a25468SMatt Arsenault  %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
2763b0a25468SMatt Arsenault  ret i64 %result
2764b0a25468SMatt Arsenault}
2765b0a25468SMatt Arsenault
2766b0a25468SMatt Arsenault; ---------------------------------------------------------------------
2767b0a25468SMatt Arsenault; atomicrmw or
2768b0a25468SMatt Arsenault; ---------------------------------------------------------------------
2769b0a25468SMatt Arsenault
2770b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret(ptr %ptr, i64 %in) {
2771b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret:
2772b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2773b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2774b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
2775b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2776b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2777b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2778b0a25468SMatt Arsenault;
2779b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret:
2780b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2781b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
2783b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2784b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2785b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2786b0a25468SMatt Arsenault;
2787b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret:
2788b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2789b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2790b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
2791b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2792b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2793b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2794b0a25468SMatt Arsenault  %tmp0 = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2795b0a25468SMatt Arsenault  ret void
2796b0a25468SMatt Arsenault}
2797b0a25468SMatt Arsenault
2798b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret_offset(ptr %out, i64 %in) {
2799b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset:
2800b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2801b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2802b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
2803b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2804b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
2805b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2806b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2807b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2808b0a25468SMatt Arsenault;
2809b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset:
2810b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2811b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2812b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
2813b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2814b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
2815b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2816b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2817b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2818b0a25468SMatt Arsenault;
2819b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset:
2820b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2821b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2822b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3] offset:32
2823b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2824b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2825b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2826b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2827b0a25468SMatt Arsenault  %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2828b0a25468SMatt Arsenault  ret void
2829b0a25468SMatt Arsenault}
2830b0a25468SMatt Arsenault
2831b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret(ptr %ptr, i64 %in) {
2832b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret:
2833b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2834b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2835b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
2836b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2837b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2838b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2839b0a25468SMatt Arsenault;
2840b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret:
2841b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2842b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2843b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
2844b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2845b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2846b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2847b0a25468SMatt Arsenault;
2848b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret:
2849b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2850b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2851b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
2852b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2853b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2854b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2855b0a25468SMatt Arsenault  %result = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2856b0a25468SMatt Arsenault  ret i64 %result
2857b0a25468SMatt Arsenault}
2858b0a25468SMatt Arsenault
2859b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret_offset(ptr %out, i64 %in) {
2860b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset:
2861b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2862b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2863b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
2864b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2865b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
2866b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2867b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2868b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2869b0a25468SMatt Arsenault;
2870b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset:
2871b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2872b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
2874b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2875b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
2876b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2877b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2878b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2879b0a25468SMatt Arsenault;
2880b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset:
2881b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2882b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2883b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
2884b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2885b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2886b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2887b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2888b0a25468SMatt Arsenault  %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2889b0a25468SMatt Arsenault  ret i64 %result
2890b0a25468SMatt Arsenault}
2891b0a25468SMatt Arsenault
2892b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_or_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
2893b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_scalar:
2894b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2895b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2896b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2897b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2898b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2899b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
2900b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1]
2901b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2902b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2903b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2904b0a25468SMatt Arsenault;
2905b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_scalar:
2906b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2907b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
2909b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
2910b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2911b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
2912b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1]
2913b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2914b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2915b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2916b0a25468SMatt Arsenault;
2917b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_scalar:
2918b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2919b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2920b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
2921b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
2922b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
2923b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
2924b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1]
2925b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2926b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2927b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2928b0a25468SMatt Arsenault  %tmp0 = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
2929b0a25468SMatt Arsenault  ret void
2930b0a25468SMatt Arsenault}
2931b0a25468SMatt Arsenault
2932b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_or_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
2933b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset_scalar:
2934b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2935b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2936b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
2937b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
2938b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
2939b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2940b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2941b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
2942b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1]
2943b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2944b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2945b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2946b0a25468SMatt Arsenault;
2947b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset_scalar:
2948b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2949b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2950b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
2951b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
2952b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
2953b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
2954b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
2955b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
2956b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1]
2957b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2958b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
2959b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2960b0a25468SMatt Arsenault;
2961b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset_scalar:
2962b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
2963b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2964b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
2965b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
2966b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
2967b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
2968b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[2:3], v[0:1] offset:32
2969b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2970b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
2971b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
2972b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
2973b0a25468SMatt Arsenault  %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
2974b0a25468SMatt Arsenault  ret void
2975b0a25468SMatt Arsenault}
2976b0a25468SMatt Arsenault
2977b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_or_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
2978b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_scalar:
2979b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
2980b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2981b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
2982b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
2983b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
2984b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
2985b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc
2986b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2987b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
2988b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2989b0a25468SMatt Arsenault;
2990b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_scalar:
2991b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
2992b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2993b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
2994b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
2995b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
2996b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
2997b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc
2998b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
2999b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3000b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3001b0a25468SMatt Arsenault;
3002b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_scalar:
3003b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3004b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3005b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3006b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3007b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3008b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3009b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc
3010b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3011b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3012b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3013b0a25468SMatt Arsenault  %result = atomicrmw or ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3014b0a25468SMatt Arsenault  ret i64 %result
3015b0a25468SMatt Arsenault}
3016b0a25468SMatt Arsenault
3017b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_or_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
3018b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset_scalar:
3019b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3020b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3021b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
3022b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
3023b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
3024b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3025b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
3026b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
3027b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc
3028b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3029b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3030b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3031b0a25468SMatt Arsenault;
3032b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset_scalar:
3033b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3034b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3035b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
3036b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
3037b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
3038b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
3039b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
3040b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
3041b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] glc
3042b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3043b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3044b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3045b0a25468SMatt Arsenault;
3046b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset_scalar:
3047b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3048b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3049b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3050b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3051b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3052b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3053b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
3054b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3055b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3056b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3057b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3058b0a25468SMatt Arsenault  %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3059b0a25468SMatt Arsenault  ret i64 %result
3060b0a25468SMatt Arsenault}
3061b0a25468SMatt Arsenault
3062b0a25468SMatt Arsenaultdefine void @flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
3063b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory:
3064b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3065b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3066b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3067b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3068b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
3069b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3070b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3071b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3072b0a25468SMatt Arsenault;
3073b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory:
3074b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3075b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3076b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3077b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3078b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3]
3079b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3080b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3081b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3082b0a25468SMatt Arsenault;
3083b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory:
3084b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3085b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3086b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[2:3] offset:32
3087b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3088b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3089b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3090b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3091b0a25468SMatt Arsenault  %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
3092b0a25468SMatt Arsenault  ret void
3093b0a25468SMatt Arsenault}
3094b0a25468SMatt Arsenault
3095b0a25468SMatt Arsenaultdefine i64 @flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
3096b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory:
3097b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3098b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3099b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3100b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3101b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
3102b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3103b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3104b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3105b0a25468SMatt Arsenault;
3106b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory:
3107b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3108b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3109b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3110b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3111b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] glc
3112b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3113b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3114b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3115b0a25468SMatt Arsenault;
3116b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory:
3117b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3118b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3119b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_or_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
3120b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3121b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3122b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3123b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3124b0a25468SMatt Arsenault  %result = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
3125b0a25468SMatt Arsenault  ret i64 %result
3126b0a25468SMatt Arsenault}
3127b0a25468SMatt Arsenault
3128b0a25468SMatt Arsenault; ---------------------------------------------------------------------
3129b0a25468SMatt Arsenault; atomicrmw xor
3130b0a25468SMatt Arsenault; ---------------------------------------------------------------------
3131b0a25468SMatt Arsenault
3132b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret(ptr %ptr, i64 %in) {
3133b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret:
3134b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3135b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3136b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3137b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3138b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3139b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3140b0a25468SMatt Arsenault;
3141b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret:
3142b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3143b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3144b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3145b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3146b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3147b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3148b0a25468SMatt Arsenault;
3149b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret:
3150b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3151b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3152b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3153b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3154b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3155b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3156b0a25468SMatt Arsenault  %tmp0 = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3157b0a25468SMatt Arsenault  ret void
3158b0a25468SMatt Arsenault}
3159b0a25468SMatt Arsenault
3160b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret_offset(ptr %out, i64 %in) {
3161b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset:
3162b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3163b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3164b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3165b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3166b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3167b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3168b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3169b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3170b0a25468SMatt Arsenault;
3171b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset:
3172b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3173b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3174b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3175b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3176b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3177b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3178b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3179b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3180b0a25468SMatt Arsenault;
3181b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset:
3182b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3183b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3184b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3] offset:32
3185b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3186b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3187b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3188b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3189b0a25468SMatt Arsenault  %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3190b0a25468SMatt Arsenault  ret void
3191b0a25468SMatt Arsenault}
3192b0a25468SMatt Arsenault
3193b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret(ptr %ptr, i64 %in) {
3194b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret:
3195b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3196b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3197b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3198b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3199b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3200b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3201b0a25468SMatt Arsenault;
3202b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret:
3203b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3204b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3205b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3206b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3207b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3208b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3209b0a25468SMatt Arsenault;
3210b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret:
3211b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3212b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3213b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3214b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3215b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3216b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3217b0a25468SMatt Arsenault  %result = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3218b0a25468SMatt Arsenault  ret i64 %result
3219b0a25468SMatt Arsenault}
3220b0a25468SMatt Arsenault
3221b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret_offset(ptr %out, i64 %in) {
3222b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset:
3223b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3224b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3225b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3226b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3227b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3228b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3229b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3230b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3231b0a25468SMatt Arsenault;
3232b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset:
3233b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3234b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3235b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3236b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3237b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3238b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3239b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3240b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3241b0a25468SMatt Arsenault;
3242b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset:
3243b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3244b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3245b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
3246b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3247b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3248b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3249b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3250b0a25468SMatt Arsenault  %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3251b0a25468SMatt Arsenault  ret i64 %result
3252b0a25468SMatt Arsenault}
3253b0a25468SMatt Arsenault
3254b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xor_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
3255b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_scalar:
3256b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3257b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3258b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3259b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
3260b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
3261b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
3262b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1]
3263b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3264b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3265b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3266b0a25468SMatt Arsenault;
3267b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_scalar:
3268b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3269b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3270b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
3271b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
3272b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
3273b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
3274b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1]
3275b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3276b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3277b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3278b0a25468SMatt Arsenault;
3279b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_scalar:
3280b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3281b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3282b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3283b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3284b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3285b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3286b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1]
3287b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3288b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3289b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3290b0a25468SMatt Arsenault  %tmp0 = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3291b0a25468SMatt Arsenault  ret void
3292b0a25468SMatt Arsenault}
3293b0a25468SMatt Arsenault
3294b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_xor_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
3295b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset_scalar:
3296b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3297b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3298b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
3299b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
3300b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
3301b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3302b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
3303b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
3304b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1]
3305b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3306b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3307b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3308b0a25468SMatt Arsenault;
3309b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset_scalar:
3310b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3311b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3312b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
3313b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
3314b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
3315b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
3316b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
3317b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
3318b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1]
3319b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3320b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3321b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3322b0a25468SMatt Arsenault;
3323b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset_scalar:
3324b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3325b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3326b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3327b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3328b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3329b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3330b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[2:3], v[0:1] offset:32
3331b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3332b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3333b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3334b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3335b0a25468SMatt Arsenault  %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3336b0a25468SMatt Arsenault  ret void
3337b0a25468SMatt Arsenault}
3338b0a25468SMatt Arsenault
3339b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xor_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
3340b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_scalar:
3341b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3342b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3343b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3344b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
3345b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
3346b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
3347b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc
3348b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3349b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3350b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3351b0a25468SMatt Arsenault;
3352b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_scalar:
3353b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3354b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3355b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
3356b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
3357b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
3358b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
3359b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc
3360b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3361b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3362b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3363b0a25468SMatt Arsenault;
3364b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_scalar:
3365b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3366b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3367b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3368b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3369b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3370b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3371b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc
3372b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3373b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3374b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3375b0a25468SMatt Arsenault  %result = atomicrmw xor ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3376b0a25468SMatt Arsenault  ret i64 %result
3377b0a25468SMatt Arsenault}
3378b0a25468SMatt Arsenault
3379b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_xor_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
3380b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset_scalar:
3381b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3382b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3383b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
3384b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
3385b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
3386b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
3387b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
3388b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
3389b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc
3390b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3391b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3392b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3393b0a25468SMatt Arsenault;
3394b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset_scalar:
3395b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3396b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3397b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
3398b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
3399b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
3400b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
3401b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
3402b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
3403b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] glc
3404b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3405b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3406b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3407b0a25468SMatt Arsenault;
3408b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset_scalar:
3409b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3410b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3411b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
3412b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
3413b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
3414b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
3415b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
3416b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3417b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3418b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3419b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3420b0a25468SMatt Arsenault  %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3421b0a25468SMatt Arsenault  ret i64 %result
3422b0a25468SMatt Arsenault}
3423b0a25468SMatt Arsenault
3424b0a25468SMatt Arsenaultdefine void @flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
3425b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory:
3426b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3427b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3428b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3429b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3430b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3431b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3432b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3433b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3434b0a25468SMatt Arsenault;
3435b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory:
3436b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3437b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3438b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3439b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3440b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3]
3441b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3442b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3443b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3444b0a25468SMatt Arsenault;
3445b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory:
3446b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3447b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3448b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[2:3] offset:32
3449b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3450b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3451b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3452b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3453b0a25468SMatt Arsenault  %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
3454b0a25468SMatt Arsenault  ret void
3455b0a25468SMatt Arsenault}
3456b0a25468SMatt Arsenault
3457b0a25468SMatt Arsenaultdefine i64 @flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
3458b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory:
3459b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3460b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3461b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
3462b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3463b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3464b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3465b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3466b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3467b0a25468SMatt Arsenault;
3468b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory:
3469b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3470b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3471b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
3472b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3473b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] glc
3474b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3475b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3476b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3477b0a25468SMatt Arsenault;
3478b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory:
3479b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3480b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3481b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_xor_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
3482b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3483b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3484b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3485b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3486b0a25468SMatt Arsenault  %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
3487b0a25468SMatt Arsenault  ret i64 %result
3488b0a25468SMatt Arsenault}
3489b0a25468SMatt Arsenault
3490b0a25468SMatt Arsenault; ---------------------------------------------------------------------
3491b0a25468SMatt Arsenault; atomicrmw max
3492b0a25468SMatt Arsenault; ---------------------------------------------------------------------
3493b0a25468SMatt Arsenault
3494b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret(ptr %ptr, i64 %in) {
3495b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret:
3496b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3497b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3498b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 4, v0
3499b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
3500b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[0:1]
3501b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[4:5]
3502b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3503b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB80_1: ; %atomicrmw.start
3504b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3505b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3506b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3507b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3508b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3509b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3510b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3511b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3512b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3513b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
3514b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3515b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
3516b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3517b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB80_1
3518b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3519b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
3520b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3521b0a25468SMatt Arsenault;
3522b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret:
3523b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3524b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3525b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
3526b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
3527b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[0:1]
3528b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[4:5]
3529b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
3530b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB80_1: ; %atomicrmw.start
3531b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3532b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3533b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3534b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3535b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3536b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3537b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3538b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3539b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3540b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
3541b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3542b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
3543b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3544b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB80_1
3545b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
3546b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
3547b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3548b0a25468SMatt Arsenault;
3549b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret:
3550b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3551b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3552b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
3553b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
3554b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB80_1: ; %atomicrmw.start
3555b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3556b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3557b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3558b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3559b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3560b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3561b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3562b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3563b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3564b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
3565b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3566b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
3567b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3568b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB80_1
3569b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
3570b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
3571b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3572b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3573b0a25468SMatt Arsenault  ret void
3574b0a25468SMatt Arsenault}
3575b0a25468SMatt Arsenault
3576b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret_offset(ptr %out, i64 %in) {
3577b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset:
3578b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3579b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3580b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
3581b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
3582b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
3583b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3584b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
3585b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
3586b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3587b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB81_1: ; %atomicrmw.start
3588b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3589b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3590b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3591b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3592b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3593b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
3594b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3595b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3596b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
3597b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
3598b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3599b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
3600b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3601b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB81_1
3602b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3603b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
3604b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3605b0a25468SMatt Arsenault;
3606b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset:
3607b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3608b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3609b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
3610b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
3611b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
3612b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3613b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
3614b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
3615b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
3616b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB81_1: ; %atomicrmw.start
3617b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3618b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3619b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3620b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3621b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3622b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
3623b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3624b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3625b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
3626b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
3627b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3628b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
3629b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3630b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB81_1
3631b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
3632b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
3633b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3634b0a25468SMatt Arsenault;
3635b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset:
3636b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3637b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3638b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
3639b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
3640b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB81_1: ; %atomicrmw.start
3641b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3642b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3643b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3644b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3645b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3646b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
3647b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3648b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3649b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3650b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
3651b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3652b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
3653b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3654b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB81_1
3655b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
3656b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
3657b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3658b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3659b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3660b0a25468SMatt Arsenault  ret void
3661b0a25468SMatt Arsenault}
3662b0a25468SMatt Arsenault
3663b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret(ptr %ptr, i64 %in) {
3664b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret:
3665b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3666b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3667b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v5, vcc, 4, v0
3668b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
3669b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v4, v[0:1]
3670b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v5, v[5:6]
3671b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3672b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB82_1: ; %atomicrmw.start
3673b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3674b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3675b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
3676b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
3677b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3678b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3679b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3680b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3681b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3682b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3683b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3684b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3685b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3686b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB82_1
3687b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3688b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
3689b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v4
3690b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v5
3691b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3692b0a25468SMatt Arsenault;
3693b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret:
3694b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3695b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3696b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
3697b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
3698b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
3699b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
3700b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
3701b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB82_1: ; %atomicrmw.start
3702b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3703b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3704b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
3705b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
3706b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3707b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3708b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3709b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3710b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3711b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3712b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3713b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3714b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3715b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB82_1
3716b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
3717b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
3718b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
3719b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
3720b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3721b0a25468SMatt Arsenault;
3722b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret:
3723b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3724b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3725b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
3726b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
3727b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB82_1: ; %atomicrmw.start
3728b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3729b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3730b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
3731b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
3732b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3733b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3734b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3735b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
3736b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3737b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3738b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3739b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3740b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3741b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB82_1
3742b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
3743b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
3744b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
3745b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
3746b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3747b0a25468SMatt Arsenault  %result = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3748b0a25468SMatt Arsenault  ret i64 %result
3749b0a25468SMatt Arsenault}
3750b0a25468SMatt Arsenault
3751b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret_offset(ptr %out, i64 %in) {
3752b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset:
3753b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3754b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3755b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
3756b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
3757b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
3758b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3759b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
3760b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
3761b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
3762b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB83_1: ; %atomicrmw.start
3763b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3764b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3765b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
3766b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
3767b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3]
3768b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
3769b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
3770b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
3771b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3772b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3773b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
3774b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3775b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3776b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB83_1
3777b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3778b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
3779b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3780b0a25468SMatt Arsenault;
3781b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset:
3782b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3783b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3784b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
3785b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
3786b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
3787b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
3788b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
3789b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
3790b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
3791b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB83_1: ; %atomicrmw.start
3792b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3793b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3794b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
3795b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
3796b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3]
3797b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
3798b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
3799b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
3800b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3801b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3802b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
3803b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3804b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3805b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB83_1
3806b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
3807b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
3808b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3809b0a25468SMatt Arsenault;
3810b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset:
3811b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3812b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
3814b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
3815b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB83_1: ; %atomicrmw.start
3816b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3817b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3818b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
3819b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
3820b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
3821b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
3822b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
3823b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
3824b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3825b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3826b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
3827b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
3828b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
3829b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB83_1
3830b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
3831b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
3832b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
3833b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
3834b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3835b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
3836b0a25468SMatt Arsenault  %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
3837b0a25468SMatt Arsenault  ret i64 %result
3838b0a25468SMatt Arsenault}
3839b0a25468SMatt Arsenault
3840b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_max_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
3841b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_scalar:
3842b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3843b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3844b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
3845b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
3846b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
3847b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
3848b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s34
3849b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s35
3850b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[0:1]
3851b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[3:4]
3852*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s4
3853b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
3854*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
3855*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
3856*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s5
3857b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB84_1: ; %atomicrmw.start
3858b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3859b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3860b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
3861*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
3862*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
3863b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
3864b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3865b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3866b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
3867b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
3868b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
3869b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
3870b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
3871b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB84_1
3872b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3873b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
3874b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3875b0a25468SMatt Arsenault;
3876b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_scalar:
3877b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3878b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
3880b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
3881b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
3882b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
3883b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s34
3884b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s35
3885b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[0:1]
3886b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[3:4]
3887*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s4
3888b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
3889*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
3890*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
3891*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s5
3892b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB84_1: ; %atomicrmw.start
3893b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3894b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3895b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
3896*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
3897*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
3898b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
3899b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3900b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
3901b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
3902b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
3903b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
3904b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
3905b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
3906b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB84_1
3907b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
3908b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
3909b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
3910b0a25468SMatt Arsenault;
3911b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_scalar:
3912b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
3913b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3914b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
3915b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
3916b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
3917*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
3918b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
3919*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
3920*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
3921*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
3922b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB84_1: ; %atomicrmw.start
3923b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
3924b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3925b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
3926*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
3927*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
3928b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
3929b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3930b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
3931b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
3932b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
3933b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
3934b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
3935b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
3936b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB84_1
3937b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
3938b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
3939b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
3940b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
3941b0a25468SMatt Arsenault  ret void
3942b0a25468SMatt Arsenault}
3943b0a25468SMatt Arsenault
3944b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
3945b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset_scalar:
3946b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
3947b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3948b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
3949b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
3950b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
3951b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
3952b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
3953b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
3954b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s34
3955b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v5, s35
3956b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[0:1]
3957b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[4:5]
3958*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
3959*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
3960*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
3961b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB85_1: ; %atomicrmw.start
3962b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
3963b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3964b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
3965*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
3966*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
3967b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
3968b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3969b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
3970b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
3971b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
3972*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
3973b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
3974*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
3975b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB85_1
3976b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
3977*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
3978b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
3979b0a25468SMatt Arsenault;
3980b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset_scalar:
3981b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
3982b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3983b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
3984b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
3985b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
3986b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
3987b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
3988b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
3989b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s34
3990b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v5, s35
3991b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
3992b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[4:5]
3993*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
3994*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
3995*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
3996b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB85_1: ; %atomicrmw.start
3997b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
3998b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
3999b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
4000*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4001*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4002b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4003b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4004b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4005b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4006b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
4007*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4008b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
4009*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4010b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB85_1
4011b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4012*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
4013b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4014b0a25468SMatt Arsenault;
4015b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset_scalar:
4016b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4017b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4018b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
4019b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
4020b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
4021*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
4022b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
4023*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
4024*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
4025*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
4026b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB85_1: ; %atomicrmw.start
4027b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4028b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4029b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[2:3]
4030*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4031*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4032b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
4033b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4034b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4035b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4036b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
4037b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4038b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
4039b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4040b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB85_1
4041b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4042b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
4043b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4044b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
4045b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
4046b0a25468SMatt Arsenault  ret void
4047b0a25468SMatt Arsenault}
4048b0a25468SMatt Arsenault
4049b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_max_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
4050b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_scalar:
4051b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4052b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4053b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
4054b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
4055b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
4056b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
4057b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
4058b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
4059b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[0:1]
4060b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[2:3]
4061*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v2, s4
4062b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
4063*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
4064*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
4065*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v3, s5
4066b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB86_1: ; %atomicrmw.start
4067b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4068b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4069*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
4070*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
4071*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4072*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4073*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4074*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
4075b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4076b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4077*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4078b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4079b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4080b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB86_1
4081b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4082b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
4083b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4084b0a25468SMatt Arsenault;
4085b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_scalar:
4086b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4087b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4088b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
4089b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
4090b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
4091b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
4092b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
4093b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
4094b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[0:1]
4095b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[2:3]
4096*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s4
4097b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
4098*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
4099*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
4100*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, s5
4101b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB86_1: ; %atomicrmw.start
4102b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4103b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4104*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
4105*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
4106*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4107*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4108*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4109*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
4110b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4111b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4112*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4113b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4114b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4115b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB86_1
4116b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4117b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
4118b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4119b0a25468SMatt Arsenault;
4120b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_scalar:
4121b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4122b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4123b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
4124b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
4125b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
4126*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
4127b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
4128*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
4129*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
4130*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
4131b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB86_1: ; %atomicrmw.start
4132b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4133b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4134*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
4135*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
4136*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4137*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4138*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4139*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
4140b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4141b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4142*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4143b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4144b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4145b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB86_1
4146b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4147b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
4148b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4149b0a25468SMatt Arsenault  %result = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
4150b0a25468SMatt Arsenault  ret i64 %result
4151b0a25468SMatt Arsenault}
4152b0a25468SMatt Arsenault
4153b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
4154b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset_scalar:
4155b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4156b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4157b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
4158b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
4159b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
4160b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
4161b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
4162b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
4163b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
4164b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
4165b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
4166b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[2:3]
4167*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
4168*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
4169*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
4170b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB87_1: ; %atomicrmw.start
4171b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4172b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4173*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
4174*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
4175*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4176*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4177*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4178*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
4179b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4180b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4181*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4182*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4183*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4184b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB87_1
4185b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4186*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
4187b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4188b0a25468SMatt Arsenault;
4189b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset_scalar:
4190b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4191b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4192b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
4193b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
4194b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
4195b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
4196b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
4197b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
4198b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
4199b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
4200b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
4201b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[2:3]
4202*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
4203*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
4204*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
4205b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB87_1: ; %atomicrmw.start
4206b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4207b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4208*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
4209*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
4210*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4211*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4212*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4213*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
4214b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4215b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4216*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4217*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4218*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4219b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB87_1
4220b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4221*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
4222b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4223b0a25468SMatt Arsenault;
4224b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset_scalar:
4225b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4226b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4227b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
4228b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
4229b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] offset:32
4230*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
4231b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
4232*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
4233*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
4234*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
4235b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB87_1: ; %atomicrmw.start
4236b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4237b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4238*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
4239*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
4240*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[6:7], v[8:9]
4241*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4242*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4243*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
4244b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4245b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4246*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4247b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
4248b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
4249b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB87_1
4250b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4251b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
4252b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4253b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
4254b0a25468SMatt Arsenault  %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
4255b0a25468SMatt Arsenault  ret i64 %result
4256b0a25468SMatt Arsenault}
4257b0a25468SMatt Arsenault
4258b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 %index) {
4259b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_addr64_offset:
4260b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
42616548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
42626548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4263b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
42646548b635SShilei Tian; GFX7-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
4265b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s4
4266b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s5
4267b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
4268b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
4269*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s1
4270*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s0
4271*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
4272*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
4273*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s3
4274*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s2
4275b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB88_1: ; %atomicrmw.start
4276b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4277b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4278b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4279*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4280*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4281b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4282b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4283b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4284b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4285b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
4286*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4287b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
4288*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4289b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB88_1
4290b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4291b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
4292b0a25468SMatt Arsenault;
4293b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_addr64_offset:
4294b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
42956548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
42966548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4297b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
42986548b635SShilei Tian; GFX8-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
4299b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s4
4300b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s5
4301b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
4302b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
4303*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s1
4304*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s0
4305*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
4306*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
4307*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s3
4308*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s2
4309b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB88_1: ; %atomicrmw.start
4310b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4311b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4312b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4313*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4314*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4315b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4316b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4317b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4318b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4319b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
4320*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4321b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
4322*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4323b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB88_1
4324b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4325b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
4326b0a25468SMatt Arsenault;
4327b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_addr64_offset:
4328b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
43296548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
43306548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4331b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
43326548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
43336548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s0, s4
43346548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s1, s5
4335*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s1
4336*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s0
4337*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[4:5] offset:32
4338*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
4339*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s3
4340*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s2
4341b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB88_1: ; %atomicrmw.start
4342b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4343b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
43446548b635SShilei Tian; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4345*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4346*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4347b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
4348b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4349b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4350b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4351b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
4352*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4353b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
4354*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4355b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB88_1
4356b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4357b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
4358b0a25468SMatt Arsenaultentry:
4359b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
4360b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
4361b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
4362b0a25468SMatt Arsenault  ret void
4363b0a25468SMatt Arsenault}
4364b0a25468SMatt Arsenault
4365b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) {
4366b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_ret_addr64_offset:
4367b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
43686548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4369b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4370b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
4371b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
4372b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
4373b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
4374b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
4375b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
4376b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
4377*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
4378*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
4379*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
4380*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
4381b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB89_1: ; %atomicrmw.start
4382b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4383b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4384*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
4385*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
4386*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
4387*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4388*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4389*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
4390b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4391b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4392*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4393*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4394*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4395b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB89_1
4396b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4397*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
4398*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
4399*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
4400*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4401b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
4402b0a25468SMatt Arsenault;
4403b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_ret_addr64_offset:
4404b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
44056548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4406b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4407b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
4408b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
4409b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
4410b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
4411b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
4412b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4413b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4414*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
4415*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
4416*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
4417*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
4418b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB89_1: ; %atomicrmw.start
4419b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4420b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4421*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
4422*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
4423*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
4424*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4425*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4426*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
4427b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4428b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4429*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4430*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4431*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4432b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB89_1
4433b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4434*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
4435*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4436*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4437*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4438b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
4439b0a25468SMatt Arsenault;
4440b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_ret_addr64_offset:
4441b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
44426548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4443b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
44446548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
44456548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
44466548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
4447b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
4448b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
4449*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
4450*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
4451*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
4452*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
4453b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB89_1: ; %atomicrmw.start
4454b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4455b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4456*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
4457*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
4458*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
4459*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4460*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4461*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
4462b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4463b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4464*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4465*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4466*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4467b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB89_1
4468b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4469*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
4470*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
4471*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
4472*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4473b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
4474b0a25468SMatt Arsenaultentry:
4475b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
4476b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
4477b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
4478b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
4479b0a25468SMatt Arsenault  ret void
4480b0a25468SMatt Arsenault}
4481b0a25468SMatt Arsenault
4482b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index) {
4483b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_addr64:
4484b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
44856548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
44866548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
4487b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
44886548b635SShilei Tian; GFX7-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
4489b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s4
4490b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s5
4491*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s1
4492*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s0
4493*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
4494*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
4495*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s3
4496*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s2
4497b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB90_1: ; %atomicrmw.start
4498b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4499b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4500b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4501*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4502*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4503b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4504b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4505b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4506b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4507b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
4508*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4509b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
4510*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4511b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB90_1
4512b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4513b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
4514b0a25468SMatt Arsenault;
4515b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_addr64:
4516b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
45176548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
45186548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4519b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
45206548b635SShilei Tian; GFX8-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
4521b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s4
4522b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s5
4523*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s1
4524*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s0
4525*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
4526*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
4527*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s3
4528*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s2
4529b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB90_1: ; %atomicrmw.start
4530b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4531b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4532b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4533*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4534*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4535b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4536b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4537b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4538b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4539b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
4540*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4541b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
4542*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4543b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB90_1
4544b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4545b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
4546b0a25468SMatt Arsenault;
4547b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_addr64:
4548b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
45496548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
45506548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
4551b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
45526548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
45536548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s0, s4
45546548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s1, s5
4555*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s1
4556*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s0
4557*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
4558*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
4559*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s3
4560*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s2
4561b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB90_1: ; %atomicrmw.start
4562b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4563b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
45646548b635SShilei Tian; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[2:3], v[2:3]
4565*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
4566*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
4567b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
4568b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4569b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4570b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
4571b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
4572*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4573b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
4574*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4575b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB90_1
4576b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4577b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
4578b0a25468SMatt Arsenaultentry:
4579b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
4580b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
4581b0a25468SMatt Arsenault  ret void
4582b0a25468SMatt Arsenault}
4583b0a25468SMatt Arsenault
4584b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) {
4585b0a25468SMatt Arsenault; GFX7-LABEL: atomic_max_i64_ret_addr64:
4586b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
45876548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
4588b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
4589b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
4590b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
4591b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
4592b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
4593b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
4594*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
4595*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
4596*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
4597*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
4598b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB91_1: ; %atomicrmw.start
4599b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4600b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4601*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
4602*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
4603*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
4604*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4605*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4606*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
4607b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4608b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4609*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4610*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4611*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4612b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB91_1
4613b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4614*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
4615*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
4616*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
4617*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4618b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
4619b0a25468SMatt Arsenault;
4620b0a25468SMatt Arsenault; GFX8-LABEL: atomic_max_i64_ret_addr64:
4621b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
46226548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
4623b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
4624b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
4625b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
4626b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
4627b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
4628b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
4629*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
4630*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
4631*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
4632*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
4633b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB91_1: ; %atomicrmw.start
4634b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4635b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4636*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
4637*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
4638*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_i64_e32 vcc, s[4:5], v[8:9]
4639*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4640*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4641*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
4642b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4643b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4644*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4645*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4646*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4647b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB91_1
4648b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4649*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
4650*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
4651*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
4652*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4653b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
4654b0a25468SMatt Arsenault;
4655b0a25468SMatt Arsenault; GFX9-LABEL: atomic_max_i64_ret_addr64:
4656b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
46576548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
4658b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
46596548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
46606548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
46616548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
4662b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
4663b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
4664*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
4665*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
4666*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
4667*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
4668b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB91_1: ; %atomicrmw.start
4669b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4670b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4671*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
4672*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
4673*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_i64_e32 vcc, s[12:13], v[8:9]
4674*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
4675*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
4676*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
4677b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4678b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4679*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
4680*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
4681*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
4682b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB91_1
4683b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4684*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
4685*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
4686*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
4687*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
4688b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
4689b0a25468SMatt Arsenaultentry:
4690b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
4691b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
4692b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
4693b0a25468SMatt Arsenault  ret void
4694b0a25468SMatt Arsenault}
4695b0a25468SMatt Arsenault
4696b0a25468SMatt Arsenaultdefine void @flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
4697b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory:
4698b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4699b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4700b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
4701b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
4702b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
4703b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4704b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
4705b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
4706b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4707b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB92_1: ; %atomicrmw.start
4708b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4709b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4710b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
4711b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4712b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4713b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
4714b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4715b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4716b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
4717b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
4718b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4719b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
4720b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4721b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB92_1
4722b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4723b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
4724b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4725b0a25468SMatt Arsenault;
4726b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory:
4727b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4728b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4729b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
4730b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
4731b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
4732b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4733b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
4734b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
4735b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
4736b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB92_1: ; %atomicrmw.start
4737b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4738b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4739b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
4740b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4741b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4742b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
4743b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4744b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4745b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
4746b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
4747b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4748b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
4749b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4750b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB92_1
4751b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4752b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
4753b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4754b0a25468SMatt Arsenault;
4755b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory:
4756b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4757b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
4759b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
4760b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB92_1: ; %atomicrmw.start
4761b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4762b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4763b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
4764b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4765b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4766b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
4767b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4768b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4769b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
4770b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
4771b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4772b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
4773b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4774b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB92_1
4775b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4776b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
4777b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4778b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
4779b0a25468SMatt Arsenault  %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
4780b0a25468SMatt Arsenault  ret void
4781b0a25468SMatt Arsenault}
4782b0a25468SMatt Arsenault
4783b0a25468SMatt Arsenaultdefine i64 @flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
4784b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory:
4785b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4786b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
4788b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4789b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
4790b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4791b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
4792b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
4793b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4794b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB93_1: ; %atomicrmw.start
4795b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4796b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4797b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
4798b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
4799b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3]
4800b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
4801b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
4802b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
4803b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4804b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4805b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4806b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4807b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4808b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB93_1
4809b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4810b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
4811b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4812b0a25468SMatt Arsenault;
4813b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory:
4814b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4815b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4816b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
4817b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4818b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
4819b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4820b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
4821b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
4822b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
4823b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB93_1: ; %atomicrmw.start
4824b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4825b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4826b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
4827b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
4828b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_i64_e32 vcc, v[8:9], v[2:3]
4829b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
4830b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
4831b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
4832b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4833b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4834b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
4835b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4836b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4837b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB93_1
4838b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4839b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
4840b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4841b0a25468SMatt Arsenault;
4842b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory:
4843b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4844b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4845b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
4846b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
4847b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB93_1: ; %atomicrmw.start
4848b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4849b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4850b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
4851b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
4852b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, v[6:7], v[2:3]
4853b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4854b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4855b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
4856b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4857b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4858b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
4859b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4860b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4861b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB93_1
4862b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4863b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
4864b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
4865b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
4866b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4867b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
4868b0a25468SMatt Arsenault  %result = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
4869b0a25468SMatt Arsenault  ret i64 %result
4870b0a25468SMatt Arsenault}
4871b0a25468SMatt Arsenault
4872b0a25468SMatt Arsenault; ---------------------------------------------------------------------
4873b0a25468SMatt Arsenault; atomicrmw umax
4874b0a25468SMatt Arsenault; ---------------------------------------------------------------------
4875b0a25468SMatt Arsenault
4876b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret(ptr %ptr, i64 %in) {
4877b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret:
4878b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4879b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4880b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 4, v0
4881b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4882b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[0:1]
4883b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[4:5]
4884b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4885b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB94_1: ; %atomicrmw.start
4886b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4887b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4888b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
4889b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4890b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4891b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
4892b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4893b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4894b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
4895b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
4896b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4897b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
4898b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4899b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB94_1
4900b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4901b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
4902b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4903b0a25468SMatt Arsenault;
4904b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret:
4905b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4906b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4907b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
4908b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
4909b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[0:1]
4910b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[4:5]
4911b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
4912b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB94_1: ; %atomicrmw.start
4913b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
4914b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4915b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
4916b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4917b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4918b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
4919b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4920b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
4921b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
4922b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
4923b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4924b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
4925b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4926b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB94_1
4927b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
4928b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
4929b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
4930b0a25468SMatt Arsenault;
4931b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret:
4932b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
4933b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4934b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
4935b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
4936b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB94_1: ; %atomicrmw.start
4937b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
4938b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4939b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
4940b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4941b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4942b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
4943b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4944b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
4945b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
4946b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
4947b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4948b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
4949b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4950b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB94_1
4951b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
4952b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
4953b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
4954b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
4955b0a25468SMatt Arsenault  ret void
4956b0a25468SMatt Arsenault}
4957b0a25468SMatt Arsenault
4958b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret_offset(ptr %out, i64 %in) {
4959b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset:
4960b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
4961b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4962b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
4963b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
4964b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
4965b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4966b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
4967b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
4968b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
4969b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB95_1: ; %atomicrmw.start
4970b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
4971b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4972b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
4973b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
4974b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
4975b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
4976b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
4977b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
4978b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
4979b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
4980b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
4981b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
4982b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
4983b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB95_1
4984b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
4985b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
4986b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
4987b0a25468SMatt Arsenault;
4988b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset:
4989b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
4990b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4991b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
4992b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
4993b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
4994b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4995b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
4996b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
4997b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
4998b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB95_1: ; %atomicrmw.start
4999b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5000b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5001b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5002b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5003b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5004b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
5005b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5006b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5007b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
5008b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
5009b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5010b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
5011b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5012b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB95_1
5013b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5014b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
5015b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5016b0a25468SMatt Arsenault;
5017b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset:
5018b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5019b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5020b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
5021b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
5022b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB95_1: ; %atomicrmw.start
5023b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5024b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5025b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5026b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5027b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5028b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
5029b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5030b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5031b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
5032b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
5033b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5034b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
5035b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5036b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB95_1
5037b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5038b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
5039b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5040b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
5041b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5042b0a25468SMatt Arsenault  ret void
5043b0a25468SMatt Arsenault}
5044b0a25468SMatt Arsenault
5045b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret(ptr %ptr, i64 %in) {
5046b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret:
5047b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5048b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5049b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v5, vcc, 4, v0
5050b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
5051b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v4, v[0:1]
5052b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v5, v[5:6]
5053b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
5054b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB96_1: ; %atomicrmw.start
5055b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5056b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5057b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
5058b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
5059b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5060b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5061b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5062b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
5063b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5064b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5065b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
5066b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5067b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5068b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB96_1
5069b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5070b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
5071b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v4
5072b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v5
5073b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5074b0a25468SMatt Arsenault;
5075b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret:
5076b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5077b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5078b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
5079b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
5080b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
5081b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
5082b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
5083b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB96_1: ; %atomicrmw.start
5084b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5085b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5086b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
5087b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
5088b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5089b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5090b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5091b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
5092b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5093b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5094b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
5095b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5096b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5097b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB96_1
5098b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5099b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
5100b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
5101b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
5102b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5103b0a25468SMatt Arsenault;
5104b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret:
5105b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5106b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5107b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
5108b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
5109b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB96_1: ; %atomicrmw.start
5110b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5111b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5112b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
5113b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
5114b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5115b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5116b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5117b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
5118b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5119b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5120b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
5121b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5122b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5123b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB96_1
5124b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5125b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
5126b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
5127b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
5128b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5129b0a25468SMatt Arsenault  %result = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
5130b0a25468SMatt Arsenault  ret i64 %result
5131b0a25468SMatt Arsenault}
5132b0a25468SMatt Arsenault
5133b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret_offset(ptr %out, i64 %in) {
5134b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset:
5135b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5136b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5137b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
5138b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
5139b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
5140b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5141b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
5142b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
5143b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
5144b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB97_1: ; %atomicrmw.start
5145b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5146b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5147b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
5148b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
5149b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3]
5150b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
5151b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
5152b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
5153b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5154b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5155b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5156b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5157b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5158b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB97_1
5159b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5160b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
5161b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5162b0a25468SMatt Arsenault;
5163b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset:
5164b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5165b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5166b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
5167b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
5168b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
5169b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5170b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
5171b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
5172b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
5173b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB97_1: ; %atomicrmw.start
5174b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5175b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5176b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
5177b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
5178b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3]
5179b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
5180b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
5181b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
5182b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5183b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5184b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5185b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5186b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5187b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB97_1
5188b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5189b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
5190b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5191b0a25468SMatt Arsenault;
5192b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset:
5193b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5194b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5195b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
5196b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
5197b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB97_1: ; %atomicrmw.start
5198b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5199b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5200b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
5201b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
5202b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5203b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5204b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5205b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
5206b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5207b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5208b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
5209b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5210b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
5211b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB97_1
5212b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5213b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
5214b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
5215b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
5216b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5217b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
5218b0a25468SMatt Arsenault  %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5219b0a25468SMatt Arsenault  ret i64 %result
5220b0a25468SMatt Arsenault}
5221b0a25468SMatt Arsenault
5222b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umax_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
5223b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_scalar:
5224b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5225b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5226b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5227b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
5228b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5229b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
5230b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s34
5231b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s35
5232b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[0:1]
5233b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[3:4]
5234*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s4
5235b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
5236*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
5237*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
5238*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s5
5239b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB98_1: ; %atomicrmw.start
5240b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5241b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5242b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5243*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5244*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5245b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5246b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5247b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5248b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5249b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
5250b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5251b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
5252b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5253b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB98_1
5254b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5255b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
5256b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5257b0a25468SMatt Arsenault;
5258b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_scalar:
5259b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5260b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5261b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
5262b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
5263b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
5264b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
5265b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s34
5266b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s35
5267b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[0:1]
5268b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[3:4]
5269*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s4
5270b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
5271*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
5272*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
5273*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s5
5274b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB98_1: ; %atomicrmw.start
5275b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5276b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5277b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5278*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5279*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5280b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5281b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5282b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5283b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5284b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
5285b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5286b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
5287b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5288b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB98_1
5289b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5290b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
5291b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5292b0a25468SMatt Arsenault;
5293b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_scalar:
5294b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5295b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5296b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
5297b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
5298b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5299*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
5300b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
5301*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
5302*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
5303*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
5304b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB98_1: ; %atomicrmw.start
5305b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5306b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5307b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5308*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5309*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5310b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5311b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5312b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5313b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5314b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
5315b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5316b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
5317b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5318b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB98_1
5319b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5320b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
5321b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5322b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
5323b0a25468SMatt Arsenault  ret void
5324b0a25468SMatt Arsenault}
5325b0a25468SMatt Arsenault
5326b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
5327b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset_scalar:
5328b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5329b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5330b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
5331b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
5332b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
5333b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
5334b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
5335b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
5336b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s34
5337b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v5, s35
5338b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[0:1]
5339b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[4:5]
5340*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
5341*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
5342*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
5343b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB99_1: ; %atomicrmw.start
5344b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5345b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5346b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5347*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5348*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5349b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5350b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5351b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5352b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5353b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
5354*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5355b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
5356*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5357b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB99_1
5358b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5359*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
5360b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5361b0a25468SMatt Arsenault;
5362b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset_scalar:
5363b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5364b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5365b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
5366b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
5367b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
5368b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
5369b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
5370b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
5371b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s34
5372b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v5, s35
5373b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
5374b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[4:5]
5375*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
5376*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
5377*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
5378b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB99_1: ; %atomicrmw.start
5379b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5380b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5381b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5382*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5383*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5384b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5385b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5386b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5387b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5388b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
5389*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5390b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
5391*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5392b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB99_1
5393b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5394*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
5395b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5396b0a25468SMatt Arsenault;
5397b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset_scalar:
5398b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5399b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5400b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
5401b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
5402b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
5403*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
5404b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
5405*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
5406*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
5407*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
5408b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB99_1: ; %atomicrmw.start
5409b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5410b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5411b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[2:3]
5412*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5413*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5414b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
5415b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5416b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5417b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5418b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
5419b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5420b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
5421b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5422b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB99_1
5423b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5424b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
5425b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5426b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
5427b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5428b0a25468SMatt Arsenault  ret void
5429b0a25468SMatt Arsenault}
5430b0a25468SMatt Arsenault
5431b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umax_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
5432b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_scalar:
5433b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5434b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5435b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
5436b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
5437b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
5438b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
5439b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
5440b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
5441b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[0:1]
5442b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[2:3]
5443*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v2, s4
5444b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
5445*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
5446*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
5447*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v3, s5
5448b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB100_1: ; %atomicrmw.start
5449b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5450b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5451*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
5452*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
5453*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5454*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5455*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5456*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
5457b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5458b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5459*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5460b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5461b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5462b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB100_1
5463b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5464b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
5465b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5466b0a25468SMatt Arsenault;
5467b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_scalar:
5468b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5469b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5470b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
5471b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
5472b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
5473b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
5474b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
5475b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
5476b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[0:1]
5477b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[2:3]
5478*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s4
5479b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
5480*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
5481*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
5482*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, s5
5483b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB100_1: ; %atomicrmw.start
5484b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5485b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5486*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
5487*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
5488*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5489*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5490*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5491*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
5492b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5493b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5494*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5495b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5496b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5497b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB100_1
5498b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5499b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
5500b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5501b0a25468SMatt Arsenault;
5502b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_scalar:
5503b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5504b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5505b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
5506b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
5507b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
5508*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
5509b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
5510*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
5511*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
5512*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
5513b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB100_1: ; %atomicrmw.start
5514b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5515b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5516*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
5517*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
5518*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5519*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5520*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5521*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
5522b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5523b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5524*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5525b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5526b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5527b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB100_1
5528b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5529b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
5530b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5531b0a25468SMatt Arsenault  %result = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
5532b0a25468SMatt Arsenault  ret i64 %result
5533b0a25468SMatt Arsenault}
5534b0a25468SMatt Arsenault
5535b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
5536b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset_scalar:
5537b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5538b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5539b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
5540b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
5541b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
5542b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
5543b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
5544b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
5545b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
5546b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
5547b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
5548b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[2:3]
5549*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
5550*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
5551*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
5552b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB101_1: ; %atomicrmw.start
5553b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5554b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5555*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
5556*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
5557*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5558*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5559*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5560*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
5561b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5562b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5563*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5564*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5565*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5566b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB101_1
5567b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5568*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
5569b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
5570b0a25468SMatt Arsenault;
5571b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset_scalar:
5572b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
5573b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5574b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
5575b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
5576b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
5577b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
5578b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
5579b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
5580b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
5581b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
5582b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
5583b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[2:3]
5584*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
5585*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
5586*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
5587b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB101_1: ; %atomicrmw.start
5588b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5589b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5590*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
5591*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
5592*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5593*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5594*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5595*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
5596b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5597b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5598*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5599*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5600*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5601b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB101_1
5602b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5603*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
5604b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
5605b0a25468SMatt Arsenault;
5606b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset_scalar:
5607b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
5608b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5609b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
5610b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
5611b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] offset:32
5612*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
5613b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
5614*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
5615*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
5616*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
5617b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB101_1: ; %atomicrmw.start
5618b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5619b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5620*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
5621*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
5622*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[6:7], v[8:9]
5623*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5624*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5625*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
5626b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5627b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5628*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
5629b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
5630b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
5631b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB101_1
5632b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5633b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
5634b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
5635b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
5636b0a25468SMatt Arsenault  %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5637b0a25468SMatt Arsenault  ret i64 %result
5638b0a25468SMatt Arsenault}
5639b0a25468SMatt Arsenault
5640b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64 %index) {
5641b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_addr64_offset:
5642b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
56436548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
56446548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
5645b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
56466548b635SShilei Tian; GFX7-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
5647b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s4
5648b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s5
5649b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
5650b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
5651*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s1
5652*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s0
5653*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
5654*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
5655*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s3
5656*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s2
5657b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB102_1: ; %atomicrmw.start
5658b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5659b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5660b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
5661*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5662*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5663b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5664b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5665b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5666b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5667b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
5668*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5669b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
5670*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5671b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB102_1
5672b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5673b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
5674b0a25468SMatt Arsenault;
5675b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_addr64_offset:
5676b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
56776548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
56786548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5679b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
56806548b635SShilei Tian; GFX8-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
5681b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s4
5682b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s5
5683b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
5684b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
5685*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s1
5686*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s0
5687*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
5688*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
5689*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s3
5690*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s2
5691b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB102_1: ; %atomicrmw.start
5692b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5693b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5694b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
5695*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5696*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5697b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
5698b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5699b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5700b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5701b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
5702*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5703b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
5704*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5705b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB102_1
5706b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5707b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
5708b0a25468SMatt Arsenault;
5709b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_addr64_offset:
5710b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
57116548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
57126548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
5713b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
57146548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
57156548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s0, s4
57166548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s1, s5
5717*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s1
5718*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s0
5719*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[4:5] offset:32
5720*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
5721*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s3
5722*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s2
5723b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB102_1: ; %atomicrmw.start
5724b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5725b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
57266548b635SShilei Tian; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[2:3], v[2:3]
5727*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
5728*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
5729b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
5730b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5731b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5732b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
5733b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
5734*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5735b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
5736*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5737b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB102_1
5738b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5739b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
5740b0a25468SMatt Arsenaultentry:
5741b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
5742b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
5743b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5744b0a25468SMatt Arsenault  ret void
5745b0a25468SMatt Arsenault}
5746b0a25468SMatt Arsenault
5747b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) {
5748b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_ret_addr64_offset:
5749b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
57506548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5751b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5752b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
5753b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
5754b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
5755b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
5756b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
5757b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
5758b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
5759*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5760*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
5761*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
5762*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
5763b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB103_1: ; %atomicrmw.start
5764b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5765b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5766*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
5767*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
5768*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
5769*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5770*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5771*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
5772b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5773b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5774*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5775*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5776*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5777b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB103_1
5778b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5779*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
5780*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
5781*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
5782*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5783b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
5784b0a25468SMatt Arsenault;
5785b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_ret_addr64_offset:
5786b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
57876548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5788b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5789b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
5790b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
5791b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
5792b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
5793b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
5794b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
5795b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
5796*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5797*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
5798*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
5799*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
5800b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB103_1: ; %atomicrmw.start
5801b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5802b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5803*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
5804*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
5805*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
5806*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5807*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5808*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
5809b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5810b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5811*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5812*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5813*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5814b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB103_1
5815b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5816*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
5817*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5818*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5819*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5820b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
5821b0a25468SMatt Arsenault;
5822b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_ret_addr64_offset:
5823b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
58246548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5825b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
58266548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
58276548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
58286548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
5829b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
5830b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
5831*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
5832*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
5833*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
5834*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
5835b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB103_1: ; %atomicrmw.start
5836b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5837b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5838*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
5839*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
5840*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
5841*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5842*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5843*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
5844b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5845b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5846*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5847*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5848*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5849b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB103_1
5850b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5851*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
5852*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
5853*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
5854*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5855b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
5856b0a25468SMatt Arsenaultentry:
5857b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
5858b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
5859b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
5860b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
5861b0a25468SMatt Arsenault  ret void
5862b0a25468SMatt Arsenault}
5863b0a25468SMatt Arsenault
5864b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) {
5865b0a25468SMatt Arsenault; GFX7-LABEL: atomic_umax_i64_ret_addr64:
5866b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
58676548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
5868b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
5869b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
5870b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
5871b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
5872b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
5873b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
5874*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5875*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
5876*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
5877*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
5878b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB104_1: ; %atomicrmw.start
5879b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5880b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5881*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
5882*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
5883*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
5884*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5885*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5886*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
5887b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5888b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5889*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5890*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5891*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5892b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB104_1
5893b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
5894*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
5895*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
5896*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
5897*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5898b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
5899b0a25468SMatt Arsenault;
5900b0a25468SMatt Arsenault; GFX8-LABEL: atomic_umax_i64_ret_addr64:
5901b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
59026548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
5903b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
5904b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
5905b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
5906b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
5907b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
5908b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
5909*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5910*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
5911*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
5912*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
5913b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB104_1: ; %atomicrmw.start
5914b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
5915b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5916*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
5917*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
5918*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_lt_u64_e32 vcc, s[4:5], v[8:9]
5919*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5920*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5921*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
5922b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5923b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
5924*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5925*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5926*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5927b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB104_1
5928b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
5929*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
5930*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
5931*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
5932*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5933b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
5934b0a25468SMatt Arsenault;
5935b0a25468SMatt Arsenault; GFX9-LABEL: atomic_umax_i64_ret_addr64:
5936b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
59376548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
5938b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
59396548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
59406548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
59416548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
5942b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
5943b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
5944*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
5945*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
5946*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
5947*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
5948b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB104_1: ; %atomicrmw.start
5949b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
5950b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5951*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
5952*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
5953*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_lt_u64_e32 vcc, s[12:13], v[8:9]
5954*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
5955*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
5956*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
5957b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5958b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
5959*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
5960*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
5961*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
5962b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB104_1
5963b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
5964*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
5965*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
5966*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
5967*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
5968b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
5969b0a25468SMatt Arsenaultentry:
5970b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
5971b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
5972b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
5973b0a25468SMatt Arsenault  ret void
5974b0a25468SMatt Arsenault}
5975b0a25468SMatt Arsenault
5976b0a25468SMatt Arsenaultdefine void @flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
5977b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory:
5978b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
5979b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5980b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
5981b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
5982b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
5983b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
5984b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
5985b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
5986b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
5987b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB105_1: ; %atomicrmw.start
5988b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
5989b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5990b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
5991b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
5992b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
5993b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
5994b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
5995b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
5996b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
5997b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
5998b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
5999b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
6000b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6001b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB105_1
6002b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6003b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6004b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6005b0a25468SMatt Arsenault;
6006b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory:
6007b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6008b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6009b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
6010b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
6011b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
6012b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6013b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
6014b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
6015b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6016b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB105_1: ; %atomicrmw.start
6017b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6018b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6019b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
6020b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6021b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6022b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
6023b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6024b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6025b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
6026b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
6027b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6028b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
6029b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6030b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB105_1
6031b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6032b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6033b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6034b0a25468SMatt Arsenault;
6035b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory:
6036b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6037b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6038b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
6039b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6040b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB105_1: ; %atomicrmw.start
6041b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6042b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6043b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
6044b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6045b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6046b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
6047b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6048b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6049b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6050b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6051b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6052b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6053b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6054b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB105_1
6055b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6056b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6057b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6058b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6059b0a25468SMatt Arsenault  %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
6060b0a25468SMatt Arsenault  ret void
6061b0a25468SMatt Arsenault}
6062b0a25468SMatt Arsenault
6063b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
6064b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory:
6065b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6066b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6067b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
6068b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6069b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
6070b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6071b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
6072b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
6073b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6074b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB106_1: ; %atomicrmw.start
6075b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6076b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6077b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
6078b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
6079b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3]
6080b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
6081b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
6082b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
6083b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6084b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6085b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6086b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6087b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6088b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB106_1
6089b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6090b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6091b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6092b0a25468SMatt Arsenault;
6093b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory:
6094b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6095b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6096b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
6097b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6098b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
6099b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6100b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
6101b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
6102b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6103b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB106_1: ; %atomicrmw.start
6104b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6105b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6106b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
6107b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
6108b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_gt_u64_e32 vcc, v[8:9], v[2:3]
6109b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
6110b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
6111b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
6112b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6113b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6114b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6115b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6116b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6117b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB106_1
6118b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6119b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6120b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6121b0a25468SMatt Arsenault;
6122b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory:
6123b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6124b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6125b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
6126b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6127b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB106_1: ; %atomicrmw.start
6128b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6129b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6130b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6131b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6132b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_gt_u64_e32 vcc, v[6:7], v[2:3]
6133b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6134b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6135b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
6136b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6137b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6138b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6139b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6140b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6141b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB106_1
6142b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6143b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6144b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
6145b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
6146b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6147b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6148b0a25468SMatt Arsenault  %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
6149b0a25468SMatt Arsenault  ret i64 %result
6150b0a25468SMatt Arsenault}
6151b0a25468SMatt Arsenault
6152b0a25468SMatt Arsenault; ---------------------------------------------------------------------
6153b0a25468SMatt Arsenault; atomicrmw umin
6154b0a25468SMatt Arsenault; ---------------------------------------------------------------------
6155b0a25468SMatt Arsenault
6156b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret(ptr %ptr, i64 %in) {
6157b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret:
6158b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6159b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6160b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 4, v0
6161b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6162b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[0:1]
6163b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[4:5]
6164b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6165b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB107_1: ; %atomicrmw.start
6166b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6167b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6168b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6169b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6170b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6171b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6172b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6173b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6174b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6175b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
6176b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6177b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
6178b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6179b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB107_1
6180b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6181b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6182b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6183b0a25468SMatt Arsenault;
6184b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret:
6185b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6186b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6187b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
6188b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6189b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[0:1]
6190b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[4:5]
6191b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6192b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB107_1: ; %atomicrmw.start
6193b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6194b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6195b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6196b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6197b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6198b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6199b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6200b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6201b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6202b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
6203b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6204b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
6205b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6206b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB107_1
6207b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6208b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6209b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6210b0a25468SMatt Arsenault;
6211b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret:
6212b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6213b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6214b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
6215b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6216b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB107_1: ; %atomicrmw.start
6217b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6218b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6219b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6220b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6221b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6222b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6223b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6224b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6225b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6226b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6227b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6228b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6229b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6230b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB107_1
6231b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6232b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6233b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6234b0a25468SMatt Arsenault  %tmp0 = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
6235b0a25468SMatt Arsenault  ret void
6236b0a25468SMatt Arsenault}
6237b0a25468SMatt Arsenault
6238b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret_offset(ptr %out, i64 %in) {
6239b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset:
6240b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6241b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6242b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
6243b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
6244b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
6245b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6246b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
6247b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
6248b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6249b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB108_1: ; %atomicrmw.start
6250b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6251b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6252b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6253b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6254b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6255b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
6256b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6257b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6258b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
6259b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
6260b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6261b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
6262b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6263b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB108_1
6264b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6265b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6266b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6267b0a25468SMatt Arsenault;
6268b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset:
6269b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6270b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6271b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
6272b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
6273b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
6274b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6275b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
6276b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
6277b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6278b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB108_1: ; %atomicrmw.start
6279b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6280b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6281b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6282b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6283b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6284b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
6285b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6286b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6287b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
6288b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
6289b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6290b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
6291b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6292b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB108_1
6293b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6294b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6295b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6296b0a25468SMatt Arsenault;
6297b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset:
6298b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6299b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6300b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
6301b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6302b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB108_1: ; %atomicrmw.start
6303b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6304b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6305b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6306b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6307b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6308b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
6309b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6310b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6311b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6312b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6313b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6314b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6315b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6316b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB108_1
6317b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6318b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6319b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6320b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6321b0a25468SMatt Arsenault  %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
6322b0a25468SMatt Arsenault  ret void
6323b0a25468SMatt Arsenault}
6324b0a25468SMatt Arsenault
6325b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret(ptr %ptr, i64 %in) {
6326b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret:
6327b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6328b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6329b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v5, vcc, 4, v0
6330b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
6331b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v4, v[0:1]
6332b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v5, v[5:6]
6333b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6334b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB109_1: ; %atomicrmw.start
6335b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6336b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6337b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
6338b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
6339b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6340b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6341b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6342b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6343b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6344b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6345b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6346b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6347b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6348b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB109_1
6349b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6350b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6351b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v4
6352b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v5
6353b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6354b0a25468SMatt Arsenault;
6355b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret:
6356b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6357b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6358b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
6359b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
6360b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
6361b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
6362b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6363b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB109_1: ; %atomicrmw.start
6364b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6365b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6366b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
6367b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
6368b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6369b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6370b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6371b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6372b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6373b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6374b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6375b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6376b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6377b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB109_1
6378b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6379b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6380b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
6381b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
6382b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6383b0a25468SMatt Arsenault;
6384b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret:
6385b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6386b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6387b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
6388b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6389b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB109_1: ; %atomicrmw.start
6390b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6391b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6392b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6393b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6394b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6395b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6396b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6397b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
6398b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6399b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6400b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6401b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6402b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6403b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB109_1
6404b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6405b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6406b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
6407b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
6408b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6409b0a25468SMatt Arsenault  %result = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
6410b0a25468SMatt Arsenault  ret i64 %result
6411b0a25468SMatt Arsenault}
6412b0a25468SMatt Arsenault
6413b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret_offset(ptr %out, i64 %in) {
6414b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset:
6415b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6416b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6417b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
6418b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6419b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
6420b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6421b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
6422b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
6423b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6424b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB110_1: ; %atomicrmw.start
6425b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6426b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6427b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
6428b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
6429b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[8:9], v[2:3]
6430b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
6431b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
6432b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
6433b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6434b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6435b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6436b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6437b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6438b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB110_1
6439b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6440b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6441b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6442b0a25468SMatt Arsenault;
6443b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset:
6444b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6445b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6446b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
6447b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
6448b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
6449b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6450b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
6451b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
6452b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6453b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB110_1: ; %atomicrmw.start
6454b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6455b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6456b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
6457b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
6458b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[8:9], v[2:3]
6459b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
6460b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
6461b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
6462b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6463b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6464b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6465b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6466b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6467b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB110_1
6468b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6469b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6470b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6471b0a25468SMatt Arsenault;
6472b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset:
6473b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6474b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6475b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
6476b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6477b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB110_1: ; %atomicrmw.start
6478b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6479b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6480b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6481b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6482b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6483b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6484b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6485b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
6486b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6487b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6488b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6489b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6490b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6491b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB110_1
6492b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6493b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
6494b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
6495b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
6496b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6497b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6498b0a25468SMatt Arsenault  %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
6499b0a25468SMatt Arsenault  ret i64 %result
6500b0a25468SMatt Arsenault}
6501b0a25468SMatt Arsenault
6502b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umin_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
6503b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_scalar:
6504b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6505b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6506b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6507b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
6508b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6509b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
6510b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s34
6511b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s35
6512b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[0:1]
6513b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[3:4]
6514*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s4
6515b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
6516*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
6517*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
6518*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s5
6519b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB111_1: ; %atomicrmw.start
6520b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6521b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6522b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6523*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6524*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6525b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
6526b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6527b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6528b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6529b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
6530b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6531b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
6532b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6533b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB111_1
6534b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6535b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
6536b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6537b0a25468SMatt Arsenault;
6538b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_scalar:
6539b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6540b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6541b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
6542b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
6543b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
6544b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
6545b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s34
6546b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s35
6547b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[0:1]
6548b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[3:4]
6549*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s4
6550b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
6551*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
6552*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
6553*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s5
6554b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB111_1: ; %atomicrmw.start
6555b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6556b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6557b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6558*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6559*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6560b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
6561b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6562b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6563b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6564b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
6565b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6566b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
6567b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6568b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB111_1
6569b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6570b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
6571b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6572b0a25468SMatt Arsenault;
6573b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_scalar:
6574b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6575b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6576b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
6577b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
6578b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
6579*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
6580b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
6581*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
6582*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
6583*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
6584b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB111_1: ; %atomicrmw.start
6585b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6586b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6587b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6588*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6589*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6590b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
6591b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6592b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6593b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6594b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
6595b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6596b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
6597b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6598b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB111_1
6599b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6600b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
6601b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6602b0a25468SMatt Arsenault  %tmp0 = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
6603b0a25468SMatt Arsenault  ret void
6604b0a25468SMatt Arsenault}
6605b0a25468SMatt Arsenault
6606b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
6607b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset_scalar:
6608b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6609b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6610b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
6611b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
6612b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
6613b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
6614b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
6615b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
6616b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s34
6617b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v5, s35
6618b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[0:1]
6619b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[4:5]
6620*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
6621*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
6622*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
6623b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB112_1: ; %atomicrmw.start
6624b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6625b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6626b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6627*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6628*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6629b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
6630b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6631b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6632b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6633b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
6634*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6635b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
6636*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6637b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB112_1
6638b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6639*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
6640b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6641b0a25468SMatt Arsenault;
6642b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset_scalar:
6643b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6644b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6645b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
6646b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
6647b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
6648b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
6649b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
6650b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
6651b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s34
6652b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v5, s35
6653b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
6654b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[4:5]
6655*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
6656*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
6657*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
6658b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB112_1: ; %atomicrmw.start
6659b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6660b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6661b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6662*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6663*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6664b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
6665b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6666b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6667b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6668b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
6669*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6670b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
6671*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6672b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB112_1
6673b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6674*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
6675b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6676b0a25468SMatt Arsenault;
6677b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset_scalar:
6678b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6679b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6680b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
6681b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
6682b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
6683*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
6684b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
6685*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
6686*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
6687*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
6688b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB112_1: ; %atomicrmw.start
6689b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6690b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6691b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[2:3]
6692*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
6693*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
6694b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
6695b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6696b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6697b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
6698b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
6699b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6700b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
6701b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6702b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB112_1
6703b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6704b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
6705b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6706b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6707b0a25468SMatt Arsenault  %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
6708b0a25468SMatt Arsenault  ret void
6709b0a25468SMatt Arsenault}
6710b0a25468SMatt Arsenault
6711b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umin_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
6712b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_scalar:
6713b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6714b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6715b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
6716b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
6717b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
6718b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
6719b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
6720b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
6721b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[0:1]
6722b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[2:3]
6723*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v2, s4
6724b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
6725*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
6726*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
6727*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v3, s5
6728b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB113_1: ; %atomicrmw.start
6729b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6730b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6731*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
6732*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
6733*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6734*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6735*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6736*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
6737b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6738b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6739*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6740b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6741b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6742b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB113_1
6743b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6744b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
6745b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6746b0a25468SMatt Arsenault;
6747b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_scalar:
6748b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6749b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6750b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
6751b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
6752b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
6753b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
6754b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
6755b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
6756b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[0:1]
6757b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[2:3]
6758*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s4
6759b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
6760*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
6761*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
6762*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, s5
6763b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB113_1: ; %atomicrmw.start
6764b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6765b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6766*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
6767*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
6768*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6769*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6770*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6771*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
6772b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6773b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6774*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6775b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6776b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6777b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB113_1
6778b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6779b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
6780b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6781b0a25468SMatt Arsenault;
6782b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_scalar:
6783b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6784b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6785b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
6786b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
6787b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
6788*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
6789b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
6790*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
6791*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
6792*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
6793b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB113_1: ; %atomicrmw.start
6794b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6795b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6796*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
6797*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
6798*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6799*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6800*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6801*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
6802b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6803b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6804*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6805b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6806b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6807b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB113_1
6808b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6809b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
6810b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6811b0a25468SMatt Arsenault  %result = atomicrmw umin ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
6812b0a25468SMatt Arsenault  ret i64 %result
6813b0a25468SMatt Arsenault}
6814b0a25468SMatt Arsenault
6815b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
6816b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset_scalar:
6817b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6818b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6819b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
6820b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
6821b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
6822b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
6823b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
6824b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
6825b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
6826b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
6827b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
6828b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[2:3]
6829*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
6830*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
6831*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
6832b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB114_1: ; %atomicrmw.start
6833b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6834b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6835*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
6836*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
6837*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6838*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6839*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6840*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
6841b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6842b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6843*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6844*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6845*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6846b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB114_1
6847b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6848*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
6849b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6850b0a25468SMatt Arsenault;
6851b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset_scalar:
6852b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6853b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6854b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
6855b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
6856b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
6857b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
6858b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
6859b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
6860b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
6861b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
6862b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
6863b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[2:3]
6864*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
6865*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
6866*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
6867b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB114_1: ; %atomicrmw.start
6868b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6869b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6870*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
6871*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
6872*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6873*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6874*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6875*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
6876b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6877b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6878*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6879*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6880*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6881b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB114_1
6882b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6883*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
6884b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6885b0a25468SMatt Arsenault;
6886b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset_scalar:
6887b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6888b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6889b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
6890b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
6891b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] offset:32
6892*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
6893b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
6894*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
6895*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
6896*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
6897b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB114_1: ; %atomicrmw.start
6898b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6899b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6900*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
6901*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
6902*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_u64_e32 vcc, s[6:7], v[8:9]
6903*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
6904*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
6905*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
6906b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6907b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6908*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
6909b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
6910b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
6911b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB114_1
6912b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
6913b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
6914b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
6915b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
6916b0a25468SMatt Arsenault  %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
6917b0a25468SMatt Arsenault  ret i64 %result
6918b0a25468SMatt Arsenault}
6919b0a25468SMatt Arsenault
6920b0a25468SMatt Arsenaultdefine void @flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
6921b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory:
6922b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
6923b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6924b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
6925b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
6926b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
6927b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6928b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
6929b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
6930b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
6931b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB115_1: ; %atomicrmw.start
6932b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
6933b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6934b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6935b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6936b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6937b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
6938b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6939b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
6940b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
6941b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
6942b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6943b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
6944b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6945b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB115_1
6946b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
6947b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
6948b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
6949b0a25468SMatt Arsenault;
6950b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory:
6951b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
6952b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6953b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
6954b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
6955b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
6956b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
6957b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
6958b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
6959b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
6960b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB115_1: ; %atomicrmw.start
6961b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
6962b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6963b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6964b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6965b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6966b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
6967b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6968b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
6969b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
6970b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
6971b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6972b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
6973b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6974b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB115_1
6975b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
6976b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
6977b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
6978b0a25468SMatt Arsenault;
6979b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory:
6980b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
6981b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6982b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
6983b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
6984b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB115_1: ; %atomicrmw.start
6985b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
6986b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6987b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
6988b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
6989b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
6990b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
6991b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
6992b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
6993b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
6994b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
6995b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
6996b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
6997b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
6998b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB115_1
6999b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7000b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7001b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7002b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7003b0a25468SMatt Arsenault  %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
7004b0a25468SMatt Arsenault  ret void
7005b0a25468SMatt Arsenault}
7006b0a25468SMatt Arsenault
7007b0a25468SMatt Arsenaultdefine i64 @flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
7008b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory:
7009b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7010b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7011b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
7012b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7013b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
7014b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7015b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
7016b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
7017b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
7018b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB116_1: ; %atomicrmw.start
7019b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7020b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7021b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
7022b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
7023b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_u64_e32 vcc, v[8:9], v[2:3]
7024b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
7025b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
7026b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
7027b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7028b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7029b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7030b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7031b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7032b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB116_1
7033b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7034b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
7035b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7036b0a25468SMatt Arsenault;
7037b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory:
7038b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7039b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7040b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
7041b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7042b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
7043b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7044b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
7045b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
7046b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
7047b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB116_1: ; %atomicrmw.start
7048b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7049b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7050b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
7051b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
7052b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_u64_e32 vcc, v[8:9], v[2:3]
7053b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
7054b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
7055b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
7056b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7057b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7058b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7059b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7060b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7061b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB116_1
7062b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7063b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
7064b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7065b0a25468SMatt Arsenault;
7066b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory:
7067b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7068b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7069b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
7070b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
7071b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB116_1: ; %atomicrmw.start
7072b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7073b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7074b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
7075b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
7076b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_u64_e32 vcc, v[6:7], v[2:3]
7077b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7078b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7079b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
7080b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7081b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7082b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7083b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7084b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7085b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB116_1
7086b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7087b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7088b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
7089b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
7090b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7091b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7092b0a25468SMatt Arsenault  %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
7093b0a25468SMatt Arsenault  ret i64 %result
7094b0a25468SMatt Arsenault}
7095b0a25468SMatt Arsenault
7096b0a25468SMatt Arsenault; ---------------------------------------------------------------------
7097b0a25468SMatt Arsenault; atomicrmw min
7098b0a25468SMatt Arsenault; ---------------------------------------------------------------------
7099b0a25468SMatt Arsenault
7100b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret(ptr %ptr, i64 %in) {
7101b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret:
7102b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7103b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7104b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 4, v0
7105b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7106b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[0:1]
7107b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[4:5]
7108b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
7109b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB117_1: ; %atomicrmw.start
7110b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7111b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7112b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7113b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7114b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7115b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7116b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7117b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7118b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7119b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
7120b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7121b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
7122b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7123b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB117_1
7124b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7125b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
7126b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7127b0a25468SMatt Arsenault;
7128b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret:
7129b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7130b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7131b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
7132b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7133b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[0:1]
7134b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[4:5]
7135b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
7136b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB117_1: ; %atomicrmw.start
7137b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7138b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7139b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7140b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7141b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7142b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7143b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7144b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7145b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7146b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
7147b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7148b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
7149b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7150b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB117_1
7151b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7152b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
7153b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7154b0a25468SMatt Arsenault;
7155b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret:
7156b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7157b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7158b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1]
7159b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
7160b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB117_1: ; %atomicrmw.start
7161b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7162b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7163b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7164b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7165b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7166b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7167b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7168b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7169b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7170b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
7171b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7172b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
7173b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7174b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB117_1
7175b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7176b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7177b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7178b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
7179b0a25468SMatt Arsenault  ret void
7180b0a25468SMatt Arsenault}
7181b0a25468SMatt Arsenault
7182b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret_offset(ptr %out, i64 %in) {
7183b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset:
7184b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7185b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7186b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
7187b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
7188b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
7189b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7190b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
7191b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
7192b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
7193b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB118_1: ; %atomicrmw.start
7194b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7195b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7196b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7197b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7198b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7199b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
7200b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7201b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7202b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
7203b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
7204b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7205b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
7206b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7207b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB118_1
7208b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7209b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
7210b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7211b0a25468SMatt Arsenault;
7212b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset:
7213b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7214b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7215b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
7216b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
7217b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
7218b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7219b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
7220b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
7221b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
7222b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB118_1: ; %atomicrmw.start
7223b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7224b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7225b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7226b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7227b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7228b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
7229b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7230b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7231b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
7232b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
7233b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7234b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
7235b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7236b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB118_1
7237b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7238b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
7239b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7240b0a25468SMatt Arsenault;
7241b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset:
7242b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7243b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7244b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
7245b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
7246b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB118_1: ; %atomicrmw.start
7247b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7248b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7249b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7250b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7251b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7252b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
7253b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7254b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7255b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7256b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
7257b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7258b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
7259b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7260b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB118_1
7261b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7262b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7263b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7264b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7265b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
7266b0a25468SMatt Arsenault  ret void
7267b0a25468SMatt Arsenault}
7268b0a25468SMatt Arsenault
7269b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret(ptr %ptr, i64 %in) {
7270b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret:
7271b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7272b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7273b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v5, vcc, 4, v0
7274b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
7275b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v4, v[0:1]
7276b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v5, v[5:6]
7277b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
7278b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB119_1: ; %atomicrmw.start
7279b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7280b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7281b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v5
7282b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v4
7283b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7284b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7285b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7286b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7287b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7288b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7289b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7290b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7291b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7292b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB119_1
7293b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7294b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
7295b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v4
7296b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v5
7297b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7298b0a25468SMatt Arsenault;
7299b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret:
7300b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7301b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7302b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
7303b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
7304b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
7305b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
7306b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
7307b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB119_1: ; %atomicrmw.start
7308b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7309b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7310b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
7311b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
7312b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7313b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7314b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7315b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7316b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7317b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7318b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7319b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7320b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7321b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB119_1
7322b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7323b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
7324b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
7325b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
7326b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7327b0a25468SMatt Arsenault;
7328b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret:
7329b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7330b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7331b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
7332b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
7333b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB119_1: ; %atomicrmw.start
7334b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7335b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7336b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
7337b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
7338b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7339b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7340b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7341b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
7342b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7343b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7344b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7345b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7346b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7347b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB119_1
7348b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7349b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7350b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
7351b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
7352b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7353b0a25468SMatt Arsenault  %result = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
7354b0a25468SMatt Arsenault  ret i64 %result
7355b0a25468SMatt Arsenault}
7356b0a25468SMatt Arsenault
7357b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret_offset(ptr %out, i64 %in) {
7358b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset:
7359b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7360b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7361b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
7362b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7363b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
7364b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7365b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
7366b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
7367b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
7368b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB120_1: ; %atomicrmw.start
7369b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7370b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7371b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
7372b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
7373b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[8:9], v[2:3]
7374b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
7375b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
7376b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
7377b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7378b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7379b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7380b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7381b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7382b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB120_1
7383b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7384b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
7385b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7386b0a25468SMatt Arsenault;
7387b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset:
7388b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7389b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7390b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
7391b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
7392b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
7393b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
7394b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
7395b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
7396b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
7397b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB120_1: ; %atomicrmw.start
7398b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7399b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7400b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
7401b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
7402b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[8:9], v[2:3]
7403b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
7404b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
7405b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
7406b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7407b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7408b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7409b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7410b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7411b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB120_1
7412b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7413b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
7414b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7415b0a25468SMatt Arsenault;
7416b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset:
7417b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7418b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7419b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
7420b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
7421b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB120_1: ; %atomicrmw.start
7422b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7423b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7424b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
7425b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
7426b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
7427b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
7428b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
7429b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
7430b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7431b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7432b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
7433b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
7434b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
7435b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB120_1
7436b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7437b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
7438b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
7439b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
7440b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7441b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7442b0a25468SMatt Arsenault  %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
7443b0a25468SMatt Arsenault  ret i64 %result
7444b0a25468SMatt Arsenault}
7445b0a25468SMatt Arsenault
7446b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_min_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
7447b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_scalar:
7448b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7449b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7450b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7451b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
7452b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7453b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
7454b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s34
7455b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s35
7456b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[0:1]
7457b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[3:4]
7458*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s4
7459b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
7460*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
7461*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
7462*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s5
7463b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB121_1: ; %atomicrmw.start
7464b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7465b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7466b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7467*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7468*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7469b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7470b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7471b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7472b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7473b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
7474b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7475b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
7476b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7477b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB121_1
7478b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7479b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
7480b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7481b0a25468SMatt Arsenault;
7482b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_scalar:
7483b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7484b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7485b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
7486b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
7487b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
7488b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
7489b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s34
7490b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s35
7491b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[0:1]
7492b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[3:4]
7493*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s4
7494b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
7495*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
7496*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
7497*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s5
7498b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB121_1: ; %atomicrmw.start
7499b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7500b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7501b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7502*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7503*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7504b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7505b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7506b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7507b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7508b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
7509b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7510b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
7511b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7512b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB121_1
7513b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7514b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
7515b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7516b0a25468SMatt Arsenault;
7517b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_scalar:
7518b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7519b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7520b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
7521b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
7522b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
7523*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
7524b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7525*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
7526*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
7527*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
7528b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB121_1: ; %atomicrmw.start
7529b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7530b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7531b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7532*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7533*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7534b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7535b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7536b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7537b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7538b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
7539b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7540b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
7541b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7542b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB121_1
7543b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7544b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
7545b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7546b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
7547b0a25468SMatt Arsenault  ret void
7548b0a25468SMatt Arsenault}
7549b0a25468SMatt Arsenault
7550b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
7551b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset_scalar:
7552b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7553b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7554b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
7555b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
7556b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
7557b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
7558b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
7559b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
7560b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v4, s34
7561b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v5, s35
7562b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v3, v[0:1]
7563b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v2, v[4:5]
7564*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
7565*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s7
7566*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s6
7567b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB122_1: ; %atomicrmw.start
7568b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7569b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7570b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7571*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7572*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7573b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7574b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7575b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7576b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7577b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
7578*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7579b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
7580*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7581b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB122_1
7582b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7583*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
7584b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7585b0a25468SMatt Arsenault;
7586b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset_scalar:
7587b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7588b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
7590b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
7591b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
7592b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
7593b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
7594b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
7595b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, s34
7596b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v5, s35
7597b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
7598b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v2, v[4:5]
7599*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
7600*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s7
7601*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s6
7602b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB122_1: ; %atomicrmw.start
7603b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7604b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7605b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7606*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7607*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7608b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7609b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7610b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7611b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7612b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
7613*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7614b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
7615*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7616b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB122_1
7617b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7618*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
7619b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7620b0a25468SMatt Arsenault;
7621b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset_scalar:
7622b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7623b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7624b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
7625b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
7626b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
7627*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s4
7628b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7629*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s7
7630*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s6
7631*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s5
7632b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB122_1: ; %atomicrmw.start
7633b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7634b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7635b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[2:3]
7636*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7637*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7638b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
7639b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7640b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7641b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7642b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
7643b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7644b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
7645b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7646b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB122_1
7647b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7648b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
7649b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7650b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7651b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
7652b0a25468SMatt Arsenault  ret void
7653b0a25468SMatt Arsenault}
7654b0a25468SMatt Arsenault
7655b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_min_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
7656b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_scalar:
7657b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7658b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7659b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s4
7660b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 4
7661b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s5
7662b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
7663b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
7664b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
7665b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[0:1]
7666b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[2:3]
7667*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v2, s4
7668b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[34:35], 0
7669*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
7670*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
7671*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v3, s5
7672b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB123_1: ; %atomicrmw.start
7673b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7674b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7675*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
7676*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
7677*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7678*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7679*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7680*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
7681b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7682b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7683*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7684b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7685b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7686b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB123_1
7687b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7688b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
7689b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7690b0a25468SMatt Arsenault;
7691b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_scalar:
7692b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7693b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7694b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s4
7695b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 4
7696b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s5
7697b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
7698b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
7699b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
7700b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[0:1]
7701b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[2:3]
7702*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s4
7703b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[34:35], 0
7704*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
7705*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
7706*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, s5
7707b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB123_1: ; %atomicrmw.start
7708b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7709b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7710*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
7711*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
7712*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7713*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7714*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7715*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
7716b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7717b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7718*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7719b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7720b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7721b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB123_1
7722b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7723b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
7724b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7725b0a25468SMatt Arsenault;
7726b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_scalar:
7727b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7728b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7729b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
7730b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
7731b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
7732*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
7733b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7734*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
7735*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
7736*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
7737b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB123_1: ; %atomicrmw.start
7738b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7739b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7740*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
7741*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
7742*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7743*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7744*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7745*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
7746b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7747b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7748*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7749b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7750b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7751b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB123_1
7752b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7753b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
7754b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7755b0a25468SMatt Arsenault  %result = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
7756b0a25468SMatt Arsenault  ret i64 %result
7757b0a25468SMatt Arsenault}
7758b0a25468SMatt Arsenault
7759b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
7760b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset_scalar:
7761b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
7762b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7763b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
7764b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
7765b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s36, s4, 36
7766b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s37, s5, 0
7767b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s36
7768b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s37
7769b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
7770b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
7771b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
7772b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[2:3]
7773*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[34:35], 0
7774*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s7
7775*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s6
7776b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB124_1: ; %atomicrmw.start
7777b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7778b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7779*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v1
7780*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v0
7781*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7782*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7783*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7784*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
7785b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7786b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7787*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7788*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7789*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7790b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB124_1
7791b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7792*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[34:35]
7793b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
7794b0a25468SMatt Arsenault;
7795b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset_scalar:
7796b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
7797b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7798b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
7799b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
7800b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s36, s4, 36
7801b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s37, s5, 0
7802b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s36
7803b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s37
7804b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
7805b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
7806b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
7807b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[2:3]
7808*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[34:35], 0
7809*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s7
7810*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s6
7811b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB124_1: ; %atomicrmw.start
7812b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7813b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7814*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v1
7815*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v0
7816*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7817*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7818*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7819*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] glc
7820b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7821b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7822*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7823*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7824*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7825b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB124_1
7826b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7827*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[34:35]
7828b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
7829b0a25468SMatt Arsenault;
7830b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset_scalar:
7831b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
7832b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7833b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s4
7834b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s5
7835b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[0:1], v[0:1] offset:32
7836*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v2, s4
7837b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[34:35], 0
7838*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s7
7839*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s6
7840*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v3, s5
7841b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB124_1: ; %atomicrmw.start
7842b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7843b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7844*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v1
7845*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v0
7846*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[6:7], v[8:9]
7847*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7848*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7849*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[2:3], v[6:9] offset:32 glc
7850b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7851b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7852*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
7853b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[34:35], vcc, s[34:35]
7854b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[34:35]
7855b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB124_1
7856b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7857b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[34:35]
7858b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
7859b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
7860b0a25468SMatt Arsenault  %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
7861b0a25468SMatt Arsenault  ret i64 %result
7862b0a25468SMatt Arsenault}
7863b0a25468SMatt Arsenault
7864b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 %index) {
7865b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_addr64_offset:
7866b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
78676548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
78686548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
7869b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
78706548b635SShilei Tian; GFX7-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
7871b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s4
7872b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s5
7873b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
7874b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
7875*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s1
7876*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s0
7877*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
7878*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
7879*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s3
7880*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s2
7881b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB125_1: ; %atomicrmw.start
7882b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7883b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7884b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
7885*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7886*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7887b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7888b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7889b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7890b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7891b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
7892*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
7893b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
7894*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
7895b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB125_1
7896b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
7897b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
7898b0a25468SMatt Arsenault;
7899b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_addr64_offset:
7900b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
79016548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
79026548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7903b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
79046548b635SShilei Tian; GFX8-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
7905b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s4
7906b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s5
7907b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
7908b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
7909*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s1
7910*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s0
7911*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[4:5]
7912*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
7913*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s3
7914*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s2
7915b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB125_1: ; %atomicrmw.start
7916b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
7917b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7918b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
7919*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7920*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7921b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
7922b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7923b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
7924b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7925b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
7926*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
7927b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
7928*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
7929b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB125_1
7930b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
7931b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
7932b0a25468SMatt Arsenault;
7933b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_addr64_offset:
7934b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
79356548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
79366548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
7937b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
79386548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[4:5], s[6:7], 3
79396548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s0, s4
79406548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s1, s5
7941*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s1
7942*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s0
7943*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[4:5] offset:32
7944*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
7945*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s3
7946*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s2
7947b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB125_1: ; %atomicrmw.start
7948b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
7949b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
79506548b635SShilei Tian; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
7951*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
7952*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
7953b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] offset:32 glc
7954b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7955b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
7956b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
7957b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
7958*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
7959b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
7960*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
7961b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB125_1
7962b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
7963b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
7964b0a25468SMatt Arsenaultentry:
7965b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
7966b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
7967b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
7968b0a25468SMatt Arsenault  ret void
7969b0a25468SMatt Arsenault}
7970b0a25468SMatt Arsenault
7971b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2, i64 %in, i64 %index) {
7972b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_ret_addr64_offset:
7973b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
79746548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
7975b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
7976b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
7977b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
7978b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
7979b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, 32
7980b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, 0
7981b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
7982b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
7983*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
7984*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
7985*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
7986*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
7987b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB126_1: ; %atomicrmw.start
7988b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
7989b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7990*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
7991*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
7992*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
7993*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
7994*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
7995*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
7996b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
7997b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
7998*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
7999*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8000*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8001b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB126_1
8002b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
8003*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
8004*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
8005*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
8006*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8007b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
8008b0a25468SMatt Arsenault;
8009b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_ret_addr64_offset:
8010b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
80116548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
8012b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8013b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
8014b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
8015b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
8016b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, 32
8017b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, 0
8018b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
8019b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
8020*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8021*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
8022*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
8023*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
8024b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB126_1: ; %atomicrmw.start
8025b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
8026b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8027*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
8028*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
8029*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
8030*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
8031*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
8032*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
8033b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8034b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8035*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
8036*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8037*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8038b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB126_1
8039b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
8040*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
8041*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
8042*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
8043*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8044b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
8045b0a25468SMatt Arsenault;
8046b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_ret_addr64_offset:
8047b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
80486548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
8049b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
80506548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
80516548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
80526548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
8053b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
8054b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
8055*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1] offset:32
8056*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
8057*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
8058*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
8059b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB126_1: ; %atomicrmw.start
8060b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
8061b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8062*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
8063*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
8064*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
8065*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
8066*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
8067*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] offset:32 glc
8068b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8069b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8070*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
8071*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8072*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8073b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB126_1
8074b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
8075*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
8076*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
8077*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
8078*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8079b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
8080b0a25468SMatt Arsenaultentry:
8081b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
8082b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %ptr, i64 4
8083b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8084b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
8085b0a25468SMatt Arsenault  ret void
8086b0a25468SMatt Arsenault}
8087b0a25468SMatt Arsenault
8088b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64(ptr %out, i64 %in) {
8089b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64:
8090b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
80916548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
8092b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
8093b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8094b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
8095b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
8096b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8097*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s1
8098*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v6, s3
8099*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v7, s2
8100*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s0
8101b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB127_1: ; %atomicrmw.start
8102b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
8103b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8104b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
8105*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
8106*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
8107b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
8108b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8109b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8110b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
8111b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, v1
8112b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8113b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, v0
8114b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8115b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB127_1
8116b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
8117b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
8118b0a25468SMatt Arsenault;
8119b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64:
8120b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
81216548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
8122b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
8123b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8124b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
8125b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
8126b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8127*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s1
8128*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s3
8129*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, s2
8130*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s0
8131b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB127_1: ; %atomicrmw.start
8132b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
8133b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8134b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
8135*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
8136*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
8137b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
8138b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8139b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8140b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
8141b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v1
8142b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8143b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, v0
8144b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8145b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB127_1
8146b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
8147b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
8148b0a25468SMatt Arsenault;
8149b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64:
8150b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
81516548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
81526548b635SShilei Tian; GFX9-NEXT:    s_mov_b64 s[4:5], 0
8153b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
81546548b635SShilei Tian; GFX9-NEXT:    v_mov_b32_e32 v0, s0
81556548b635SShilei Tian; GFX9-NEXT:    v_mov_b32_e32 v1, s1
8156b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8157*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s1
8158*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v6, s3
8159*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v7, s2
8160*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s0
8161b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB127_1: ; %atomicrmw.start
8162b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
8163b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
81646548b635SShilei Tian; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
8165*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v1, v6, v3, vcc
8166*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v0, v7, v2, vcc
8167b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
8168b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8169b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8170b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
8171b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, v1
81726548b635SShilei Tian; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8173b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, v0
81746548b635SShilei Tian; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8175b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB127_1
8176b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
8177b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
8178b0a25468SMatt Arsenaultentry:
8179b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %out, i64 %in seq_cst, !noalias.addrspace !1
8180b0a25468SMatt Arsenault  ret void
8181b0a25468SMatt Arsenault}
8182b0a25468SMatt Arsenault
8183b0a25468SMatt Arsenaultdefine amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %in, i64 %index) {
8184b0a25468SMatt Arsenault; GFX7-LABEL: atomic_min_i64_ret_addr64:
8185b0a25468SMatt Arsenault; GFX7:       ; %bb.0: ; %entry
81866548b635SShilei Tian; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
8187b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
8188b0a25468SMatt Arsenault; GFX7-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
8189b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s0, s0, s6
8190b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s1, s1, s7
8191b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s0
8192b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s1
8193*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8194*eeac0ffaSNikita Popov; GFX7-NEXT:    s_mov_b64 s[0:1], 0
8195*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v4, s5
8196*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v5, s4
8197b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB128_1: ; %atomicrmw.start
8198b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
8199b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8200*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v9, v3
8201*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v8, v2
8202*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
8203*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
8204*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
8205*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
8206b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8207b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8208*eeac0ffaSNikita Popov; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
8209*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8210*eeac0ffaSNikita Popov; GFX7-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8211b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB128_1
8212b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
8213*eeac0ffaSNikita Popov; GFX7-NEXT:    s_or_b64 exec, exec, s[0:1]
8214*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v0, s2
8215*eeac0ffaSNikita Popov; GFX7-NEXT:    v_mov_b32_e32 v1, s3
8216*eeac0ffaSNikita Popov; GFX7-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8217b0a25468SMatt Arsenault; GFX7-NEXT:    s_endpgm
8218b0a25468SMatt Arsenault;
8219b0a25468SMatt Arsenault; GFX8-LABEL: atomic_min_i64_ret_addr64:
8220b0a25468SMatt Arsenault; GFX8:       ; %bb.0: ; %entry
82216548b635SShilei Tian; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
8222b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
8223b0a25468SMatt Arsenault; GFX8-NEXT:    s_lshl_b64 s[6:7], s[6:7], 3
8224b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s0, s0, s6
8225b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s1, s1, s7
8226b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s0
8227b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s1
8228*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8229*eeac0ffaSNikita Popov; GFX8-NEXT:    s_mov_b64 s[0:1], 0
8230*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, s5
8231*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, s4
8232b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB128_1: ; %atomicrmw.start
8233b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
8234b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8235*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v3
8236*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v2
8237*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_ge_i64_e32 vcc, s[4:5], v[8:9]
8238*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
8239*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
8240*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
8241b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8242b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8243*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
8244*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8245*eeac0ffaSNikita Popov; GFX8-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8246b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB128_1
8247b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
8248*eeac0ffaSNikita Popov; GFX8-NEXT:    s_or_b64 exec, exec, s[0:1]
8249*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, s2
8250*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, s3
8251*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8252b0a25468SMatt Arsenault; GFX8-NEXT:    s_endpgm
8253b0a25468SMatt Arsenault;
8254b0a25468SMatt Arsenault; GFX9-LABEL: atomic_min_i64_ret_addr64:
8255b0a25468SMatt Arsenault; GFX9:       ; %bb.0: ; %entry
82566548b635SShilei Tian; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
8257b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
82586548b635SShilei Tian; GFX9-NEXT:    s_lshl_b64 s[0:1], s[14:15], 3
82596548b635SShilei Tian; GFX9-NEXT:    s_add_u32 s0, s8, s0
82606548b635SShilei Tian; GFX9-NEXT:    s_addc_u32 s1, s9, s1
8261b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s0
8262b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s1
8263*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
8264*eeac0ffaSNikita Popov; GFX9-NEXT:    s_mov_b64 s[0:1], 0
8265*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v4, s13
8266*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v5, s12
8267b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB128_1: ; %atomicrmw.start
8268b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
8269b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8270*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v9, v3
8271*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v8, v2
8272*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_ge_i64_e32 vcc, s[12:13], v[8:9]
8273*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v7, v4, v9, vcc
8274*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cndmask_b32_e32 v6, v5, v8, vcc
8275*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[6:9] glc
8276b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8277b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8278*eeac0ffaSNikita Popov; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
8279*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
8280*eeac0ffaSNikita Popov; GFX9-NEXT:    s_andn2_b64 exec, exec, s[0:1]
8281b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB128_1
8282b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
8283*eeac0ffaSNikita Popov; GFX9-NEXT:    s_or_b64 exec, exec, s[0:1]
8284*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v0, s10
8285*eeac0ffaSNikita Popov; GFX9-NEXT:    v_mov_b32_e32 v1, s11
8286*eeac0ffaSNikita Popov; GFX9-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
8287b0a25468SMatt Arsenault; GFX9-NEXT:    s_endpgm
8288b0a25468SMatt Arsenaultentry:
8289b0a25468SMatt Arsenault  %ptr = getelementptr i64, ptr %out, i64 %index
8290b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1, !noalias.addrspace !1
8291b0a25468SMatt Arsenault  store i64 %tmp0, ptr %out2
8292b0a25468SMatt Arsenault  ret void
8293b0a25468SMatt Arsenault}
8294b0a25468SMatt Arsenault
8295b0a25468SMatt Arsenaultdefine void @flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
8296b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory:
8297b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8298b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8299b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v8, vcc, 32, v0
8300b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
8301b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
8302b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8303b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v7, v[0:1]
8304b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v6, v[8:9]
8305b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
8306b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB129_1: ; %atomicrmw.start
8307b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
8308b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8309b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
8310b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
8311b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
8312b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
8313b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8314b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8315b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
8316b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v7, v1
8317b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8318b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v6, v0
8319b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8320b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB129_1
8321b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
8322b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
8323b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8324b0a25468SMatt Arsenault;
8325b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory:
8326b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8327b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8328b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 32, v0
8329b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v9, vcc, 0, v1, vcc
8330b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
8331b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8332b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v7, v[0:1]
8333b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v6, v[8:9]
8334b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
8335b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB129_1: ; %atomicrmw.start
8336b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
8337b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8338b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
8339b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
8340b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
8341b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[8:9], v[4:7] glc
8342b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8343b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8344b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[6:7]
8345b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v1
8346b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8347b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v0
8348b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8349b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB129_1
8350b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
8351b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
8352b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8353b0a25468SMatt Arsenault;
8354b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory:
8355b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8356b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8357b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[6:7], v[0:1] offset:32
8358b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
8359b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB129_1: ; %atomicrmw.start
8360b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
8361b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8362b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
8363b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
8364b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
8365b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
8366b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8367b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8368b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
8369b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
8370b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8371b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
8372b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8373b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB129_1
8374b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
8375b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
8376b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8377b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8378b0a25468SMatt Arsenault  %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
8379b0a25468SMatt Arsenault  ret void
8380b0a25468SMatt Arsenault}
8381b0a25468SMatt Arsenault
8382b0a25468SMatt Arsenaultdefine i64 @flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
8383b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory:
8384b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8385b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8386b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v4, vcc, 32, v0
8387b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
8388b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 36, v0
8389b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8390b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v1, v[0:1]
8391b0a25468SMatt Arsenault; GFX7-NEXT:    flat_load_dword v0, v[4:5]
8392b0a25468SMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
8393b0a25468SMatt Arsenault; GFX7-NEXT:  .LBB130_1: ; %atomicrmw.start
8394b0a25468SMatt Arsenault; GFX7-NEXT:    ; =>This Inner Loop Header: Depth=1
8395b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8396b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v9, v1
8397b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v8, v0
8398b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_le_i64_e32 vcc, v[8:9], v[2:3]
8399b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
8400b0a25468SMatt Arsenault; GFX7-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
8401b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
8402b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8403b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8404b0a25468SMatt Arsenault; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
8405b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8406b0a25468SMatt Arsenault; GFX7-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8407b0a25468SMatt Arsenault; GFX7-NEXT:    s_cbranch_execnz .LBB130_1
8408b0a25468SMatt Arsenault; GFX7-NEXT:  ; %bb.2: ; %atomicrmw.end
8409b0a25468SMatt Arsenault; GFX7-NEXT:    s_or_b64 exec, exec, s[4:5]
8410b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8411b0a25468SMatt Arsenault;
8412b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory:
8413b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8414b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8415b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 32, v0
8416b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
8417b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 36, v0
8418b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8419b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v1, v[0:1]
8420b0a25468SMatt Arsenault; GFX8-NEXT:    flat_load_dword v0, v[4:5]
8421b0a25468SMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
8422b0a25468SMatt Arsenault; GFX8-NEXT:  .LBB130_1: ; %atomicrmw.start
8423b0a25468SMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
8424b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8425b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v9, v1
8426b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v8, v0
8427b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_le_i64_e32 vcc, v[8:9], v[2:3]
8428b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v7, v3, v9, vcc
8429b0a25468SMatt Arsenault; GFX8-NEXT:    v_cndmask_b32_e32 v6, v2, v8, vcc
8430b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[6:9] glc
8431b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8432b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8433b0a25468SMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[8:9]
8434b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8435b0a25468SMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8436b0a25468SMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB130_1
8437b0a25468SMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
8438b0a25468SMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
8439b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8440b0a25468SMatt Arsenault;
8441b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory:
8442b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8443b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8444b0a25468SMatt Arsenault; GFX9-NEXT:    flat_load_dwordx2 v[4:5], v[0:1] offset:32
8445b0a25468SMatt Arsenault; GFX9-NEXT:    s_mov_b64 s[4:5], 0
8446b0a25468SMatt Arsenault; GFX9-NEXT:  .LBB130_1: ; %atomicrmw.start
8447b0a25468SMatt Arsenault; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
8448b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8449b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v7, v5
8450b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v6, v4
8451b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_le_i64_e32 vcc, v[6:7], v[2:3]
8452b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v7, vcc
8453b0a25468SMatt Arsenault; GFX9-NEXT:    v_cndmask_b32_e32 v4, v2, v6, vcc
8454b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] offset:32 glc
8455b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8456b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8457b0a25468SMatt Arsenault; GFX9-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
8458b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
8459b0a25468SMatt Arsenault; GFX9-NEXT:    s_andn2_b64 exec, exec, s[4:5]
8460b0a25468SMatt Arsenault; GFX9-NEXT:    s_cbranch_execnz .LBB130_1
8461b0a25468SMatt Arsenault; GFX9-NEXT:  ; %bb.2: ; %atomicrmw.end
8462b0a25468SMatt Arsenault; GFX9-NEXT:    s_or_b64 exec, exec, s[4:5]
8463b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, v4
8464b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, v5
8465b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8466b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8467b0a25468SMatt Arsenault  %result = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
8468b0a25468SMatt Arsenault  ret i64 %result
8469b0a25468SMatt Arsenault}
8470b0a25468SMatt Arsenault
8471b0a25468SMatt Arsenault; ---------------------------------------------------------------------
8472b0a25468SMatt Arsenault; atomicrmw uinc_wrap
8473b0a25468SMatt Arsenault; ---------------------------------------------------------------------
8474b0a25468SMatt Arsenault
8475b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret(ptr %ptr, i64 %in) {
8476b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret:
8477b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8478b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8479b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8480b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8481b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8482b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8483b0a25468SMatt Arsenault;
8484b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret:
8485b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8486b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8487b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8488b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8489b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8490b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8491b0a25468SMatt Arsenault;
8492b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret:
8493b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8494b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8495b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8496b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8497b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8498b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8499b0a25468SMatt Arsenault  %tmp0 = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8500b0a25468SMatt Arsenault  ret void
8501b0a25468SMatt Arsenault}
8502b0a25468SMatt Arsenault
8503b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret_offset(ptr %out, i64 %in) {
8504b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset:
8505b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8506b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8507b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8508b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8509b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8510b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8511b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8512b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8513b0a25468SMatt Arsenault;
8514b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset:
8515b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8516b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8517b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8518b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8519b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8520b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8521b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8522b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8523b0a25468SMatt Arsenault;
8524b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset:
8525b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8526b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8527b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3] offset:32
8528b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8529b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8530b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8531b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8532b0a25468SMatt Arsenault  %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8533b0a25468SMatt Arsenault  ret void
8534b0a25468SMatt Arsenault}
8535b0a25468SMatt Arsenault
8536b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret(ptr %ptr, i64 %in) {
8537b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret:
8538b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8539b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8540b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8541b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8542b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8543b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8544b0a25468SMatt Arsenault;
8545b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret:
8546b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8547b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8548b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8549b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8550b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8551b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8552b0a25468SMatt Arsenault;
8553b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret:
8554b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8555b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8556b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8557b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8558b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8559b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8560b0a25468SMatt Arsenault  %result = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8561b0a25468SMatt Arsenault  ret i64 %result
8562b0a25468SMatt Arsenault}
8563b0a25468SMatt Arsenault
8564b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret_offset(ptr %out, i64 %in) {
8565b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset:
8566b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8567b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8568b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8569b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8570b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8571b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8572b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8573b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8574b0a25468SMatt Arsenault;
8575b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset:
8576b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8577b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8578b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8579b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8580b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8581b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8582b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8583b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8584b0a25468SMatt Arsenault;
8585b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset:
8586b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8587b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8588b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
8589b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8590b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8591b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8592b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8593b0a25468SMatt Arsenault  %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8594b0a25468SMatt Arsenault  ret i64 %result
8595b0a25468SMatt Arsenault}
8596b0a25468SMatt Arsenault
8597b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
8598b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar:
8599b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8600b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8601b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8602b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8603b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
8604b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
8605b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1]
8606b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8607b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8608b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8609b0a25468SMatt Arsenault;
8610b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar:
8611b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8612b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8613b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
8614b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
8615b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
8616b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
8617b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1]
8618b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8619b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8620b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8621b0a25468SMatt Arsenault;
8622b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_scalar:
8623b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8624b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8625b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
8626b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
8627b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
8628b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
8629b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1]
8630b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8631b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8632b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8633b0a25468SMatt Arsenault  %tmp0 = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8634b0a25468SMatt Arsenault  ret void
8635b0a25468SMatt Arsenault}
8636b0a25468SMatt Arsenault
8637b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
8638b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar:
8639b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8640b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8641b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
8642b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
8643b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
8644b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8645b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8646b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
8647b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1]
8648b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8649b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8650b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8651b0a25468SMatt Arsenault;
8652b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar:
8653b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8654b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8655b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
8656b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
8657b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
8658b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
8659b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
8660b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
8661b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1]
8662b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8663b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8664b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8665b0a25468SMatt Arsenault;
8666b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset_scalar:
8667b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8668b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8669b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
8670b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
8671b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
8672b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
8673b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[2:3], v[0:1] offset:32
8674b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8675b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8676b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8677b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8678b0a25468SMatt Arsenault  %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8679b0a25468SMatt Arsenault  ret void
8680b0a25468SMatt Arsenault}
8681b0a25468SMatt Arsenault
8682b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
8683b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar:
8684b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8685b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8686b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8687b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8688b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
8689b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
8690b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
8691b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8692b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8693b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8694b0a25468SMatt Arsenault;
8695b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar:
8696b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8697b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8698b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
8699b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
8700b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
8701b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
8702b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
8703b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8704b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8705b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8706b0a25468SMatt Arsenault;
8707b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_scalar:
8708b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8709b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8710b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
8711b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
8712b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
8713b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
8714b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
8715b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8716b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8717b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8718b0a25468SMatt Arsenault  %result = atomicrmw uinc_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8719b0a25468SMatt Arsenault  ret i64 %result
8720b0a25468SMatt Arsenault}
8721b0a25468SMatt Arsenault
8722b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
8723b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar:
8724b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8725b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8726b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
8727b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
8728b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
8729b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8730b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8731b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
8732b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
8733b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8734b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8735b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8736b0a25468SMatt Arsenault;
8737b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar:
8738b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8739b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8740b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
8741b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
8742b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
8743b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
8744b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
8745b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
8746b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] glc
8747b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8748b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8749b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8750b0a25468SMatt Arsenault;
8751b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset_scalar:
8752b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8753b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8754b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
8755b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
8756b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
8757b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
8758b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
8759b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8760b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8761b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8762b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8763b0a25468SMatt Arsenault  %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8764b0a25468SMatt Arsenault  ret i64 %result
8765b0a25468SMatt Arsenault}
8766b0a25468SMatt Arsenault
8767b0a25468SMatt Arsenaultdefine void @flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
8768b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory:
8769b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8770b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8771b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8772b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8773b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8774b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8775b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8776b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8777b0a25468SMatt Arsenault;
8778b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory:
8779b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8780b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8781b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8782b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8783b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3]
8784b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8785b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8786b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8787b0a25468SMatt Arsenault;
8788b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory:
8789b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8790b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8791b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[2:3] offset:32
8792b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8793b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8794b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8795b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8796b0a25468SMatt Arsenault  %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
8797b0a25468SMatt Arsenault  ret void
8798b0a25468SMatt Arsenault}
8799b0a25468SMatt Arsenault
8800b0a25468SMatt Arsenaultdefine i64 @flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
8801b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory:
8802b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8803b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8804b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8805b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8806b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8807b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8808b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8809b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8810b0a25468SMatt Arsenault;
8811b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory:
8812b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8813b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8814b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8815b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8816b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] glc
8817b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8818b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8819b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8820b0a25468SMatt Arsenault;
8821b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory:
8822b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8823b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8824b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_inc_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
8825b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8826b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8827b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8828b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8829b0a25468SMatt Arsenault  %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
8830b0a25468SMatt Arsenault  ret i64 %result
8831b0a25468SMatt Arsenault}
8832b0a25468SMatt Arsenault
8833b0a25468SMatt Arsenault; ---------------------------------------------------------------------
8834b0a25468SMatt Arsenault; atomicrmw udec_wrap
8835b0a25468SMatt Arsenault; ---------------------------------------------------------------------
8836b0a25468SMatt Arsenault
8837b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret(ptr %ptr, i64 %in) {
8838b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret:
8839b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8840b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8841b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
8842b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8843b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8844b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8845b0a25468SMatt Arsenault;
8846b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret:
8847b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8848b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8849b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
8850b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8851b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8852b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8853b0a25468SMatt Arsenault;
8854b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret:
8855b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8856b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8857b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
8858b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8859b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8860b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8861b0a25468SMatt Arsenault  %tmp0 = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8862b0a25468SMatt Arsenault  ret void
8863b0a25468SMatt Arsenault}
8864b0a25468SMatt Arsenault
8865b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret_offset(ptr %out, i64 %in) {
8866b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset:
8867b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8868b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8869b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8870b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8871b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
8872b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8873b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8874b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8875b0a25468SMatt Arsenault;
8876b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset:
8877b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8878b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8879b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8880b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8881b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
8882b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8883b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8884b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8885b0a25468SMatt Arsenault;
8886b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset:
8887b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8888b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8889b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3] offset:32
8890b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8891b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8892b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8893b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8894b0a25468SMatt Arsenault  %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8895b0a25468SMatt Arsenault  ret void
8896b0a25468SMatt Arsenault}
8897b0a25468SMatt Arsenault
8898b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret(ptr %ptr, i64 %in) {
8899b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret:
8900b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8901b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8902b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
8903b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8904b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8905b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8906b0a25468SMatt Arsenault;
8907b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret:
8908b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8909b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8910b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
8911b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8912b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8913b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8914b0a25468SMatt Arsenault;
8915b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret:
8916b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8917b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8918b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
8919b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8920b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8921b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8922b0a25468SMatt Arsenault  %result = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8923b0a25468SMatt Arsenault  ret i64 %result
8924b0a25468SMatt Arsenault}
8925b0a25468SMatt Arsenault
8926b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret_offset(ptr %out, i64 %in) {
8927b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset:
8928b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8929b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8930b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
8931b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8932b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
8933b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8934b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8935b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8936b0a25468SMatt Arsenault;
8937b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset:
8938b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8939b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8940b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
8941b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
8942b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
8943b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8944b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8945b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8946b0a25468SMatt Arsenault;
8947b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset:
8948b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8949b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8950b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
8951b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8952b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8953b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8954b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
8955b0a25468SMatt Arsenault  %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
8956b0a25468SMatt Arsenault  ret i64 %result
8957b0a25468SMatt Arsenault}
8958b0a25468SMatt Arsenault
8959b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_scalar(ptr inreg %ptr, i64 inreg %in) {
8960b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_scalar:
8961b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
8962b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8963b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
8964b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
8965b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
8966b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
8967b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1]
8968b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8969b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
8970b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
8971b0a25468SMatt Arsenault;
8972b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_scalar:
8973b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
8974b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8975b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
8976b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
8977b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
8978b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
8979b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1]
8980b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8981b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
8982b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
8983b0a25468SMatt Arsenault;
8984b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_scalar:
8985b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
8986b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8987b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
8988b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
8989b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
8990b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
8991b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1]
8992b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
8993b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
8994b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
8995b0a25468SMatt Arsenault  %tmp0 = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
8996b0a25468SMatt Arsenault  ret void
8997b0a25468SMatt Arsenault}
8998b0a25468SMatt Arsenault
8999b0a25468SMatt Arsenaultdefine amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg %out, i64 inreg %in) {
9000b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar:
9001b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
9002b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9003b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
9004b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
9005b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
9006b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9007b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9008b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
9009b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1]
9010b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9011b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
9012b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
9013b0a25468SMatt Arsenault;
9014b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar:
9015b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
9016b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9017b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
9018b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
9019b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
9020b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
9021b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
9022b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
9023b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1]
9024b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9025b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
9026b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
9027b0a25468SMatt Arsenault;
9028b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset_scalar:
9029b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
9030b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9031b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
9032b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
9033b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
9034b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
9035b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[2:3], v[0:1] offset:32
9036b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9037b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
9038b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
9039b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
9040b0a25468SMatt Arsenault  %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
9041b0a25468SMatt Arsenault  ret void
9042b0a25468SMatt Arsenault}
9043b0a25468SMatt Arsenault
9044b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_scalar(ptr inreg %ptr, i64 inreg %in) {
9045b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_scalar:
9046b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
9047b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9048b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9049b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9050b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s4
9051b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s5
9052b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
9053b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9054b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
9055b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
9056b0a25468SMatt Arsenault;
9057b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_scalar:
9058b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
9059b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9060b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
9061b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
9062b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s4
9063b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s5
9064b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
9065b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9066b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
9067b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
9068b0a25468SMatt Arsenault;
9069b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_scalar:
9070b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
9071b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9072b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
9073b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
9074b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
9075b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
9076b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
9077b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9078b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
9079b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
9080b0a25468SMatt Arsenault  %result = atomicrmw udec_wrap ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1
9081b0a25468SMatt Arsenault  ret i64 %result
9082b0a25468SMatt Arsenault}
9083b0a25468SMatt Arsenault
9084b0a25468SMatt Arsenaultdefine amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %out, i64 inreg %in) {
9085b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar:
9086b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
9087b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9088b0a25468SMatt Arsenault; GFX7-NEXT:    s_add_u32 s34, s4, 32
9089b0a25468SMatt Arsenault; GFX7-NEXT:    s_addc_u32 s35, s5, 0
9090b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v2, s34
9091b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, s6
9092b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, s7
9093b0a25468SMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v3, s35
9094b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
9095b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9096b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
9097b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
9098b0a25468SMatt Arsenault;
9099b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar:
9100b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
9101b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9102b0a25468SMatt Arsenault; GFX8-NEXT:    s_add_u32 s34, s4, 32
9103b0a25468SMatt Arsenault; GFX8-NEXT:    s_addc_u32 s35, s5, 0
9104b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v2, s34
9105b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, s6
9106b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, s7
9107b0a25468SMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, s35
9108b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] glc
9109b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9110b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
9111b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
9112b0a25468SMatt Arsenault;
9113b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset_scalar:
9114b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
9115b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9116b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v0, s6
9117b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v1, s7
9118b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v2, s4
9119b0a25468SMatt Arsenault; GFX9-NEXT:    v_mov_b32_e32 v3, s5
9120b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3], v[0:1] offset:32 glc
9121b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9122b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
9123b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
9124b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
9125b0a25468SMatt Arsenault  %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1
9126b0a25468SMatt Arsenault  ret i64 %result
9127b0a25468SMatt Arsenault}
9128b0a25468SMatt Arsenault
9129b0a25468SMatt Arsenaultdefine void @flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
9130b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory:
9131b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
9132b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9133b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
9134b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
9135b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
9136b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9137b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
9138b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
9139b0a25468SMatt Arsenault;
9140b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory:
9141b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
9142b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9143b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
9144b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
9145b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3]
9146b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9147b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
9148b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
9149b0a25468SMatt Arsenault;
9150b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory:
9151b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
9152b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9153b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[2:3] offset:32
9154b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9155b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
9156b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
9157b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
9158b0a25468SMatt Arsenault  %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
9159b0a25468SMatt Arsenault  ret void
9160b0a25468SMatt Arsenault}
9161b0a25468SMatt Arsenault
9162b0a25468SMatt Arsenaultdefine i64 @flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 %in) {
9163b0a25468SMatt Arsenault; GFX7-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory:
9164b0a25468SMatt Arsenault; GFX7:       ; %bb.0:
9165b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9166b0a25468SMatt Arsenault; GFX7-NEXT:    v_add_i32_e32 v0, vcc, 32, v0
9167b0a25468SMatt Arsenault; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
9168b0a25468SMatt Arsenault; GFX7-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
9169b0a25468SMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9170b0a25468SMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1_vol
9171b0a25468SMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
9172b0a25468SMatt Arsenault;
9173b0a25468SMatt Arsenault; GFX8-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory:
9174b0a25468SMatt Arsenault; GFX8:       ; %bb.0:
9175b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9176b0a25468SMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
9177b0a25468SMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
9178b0a25468SMatt Arsenault; GFX8-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
9179b0a25468SMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9180b0a25468SMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1_vol
9181b0a25468SMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
9182b0a25468SMatt Arsenault;
9183b0a25468SMatt Arsenault; GFX9-LABEL: flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory:
9184b0a25468SMatt Arsenault; GFX9:       ; %bb.0:
9185b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9186b0a25468SMatt Arsenault; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] offset:32 glc
9187b0a25468SMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
9188b0a25468SMatt Arsenault; GFX9-NEXT:    buffer_wbinvl1_vol
9189b0a25468SMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
9190b0a25468SMatt Arsenault  %gep = getelementptr i64, ptr %out, i64 4
9191b0a25468SMatt Arsenault  %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1
9192b0a25468SMatt Arsenault  ret i64 %result
9193b0a25468SMatt Arsenault}
9194b0a25468SMatt Arsenault
9195b0a25468SMatt Arsenault!0 = !{}
9196b0a25468SMatt Arsenault!1 = !{i32 5, i32 6}
9197