xref: /llvm-project/llvm/test/CodeGen/AMDGPU/private-memory-atomics.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=atomic-expand < %s | FileCheck -check-prefix=IR %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=tahiti < %s | FileCheck -check-prefix=GCN %s
4
5define i32 @load_atomic_private_seq_cst_i32(ptr addrspace(5) %ptr) {
6; IR-LABEL: define i32 @load_atomic_private_seq_cst_i32(
7; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
8; IR-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
9; IR-NEXT:    ret i32 [[LOAD]]
10;
11; GCN-LABEL: load_atomic_private_seq_cst_i32:
12; GCN:       ; %bb.0:
13; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
15; GCN-NEXT:    s_waitcnt vmcnt(0)
16; GCN-NEXT:    s_setpc_b64 s[30:31]
17  %load = load atomic i32, ptr addrspace(5) %ptr seq_cst, align 4
18  ret i32 %load
19}
20
21define i64 @load_atomic_private_seq_cst_i64(ptr addrspace(5) %ptr) {
22; IR-LABEL: define i64 @load_atomic_private_seq_cst_i64(
23; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
24; IR-NEXT:    [[LOAD:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8
25; IR-NEXT:    ret i64 [[LOAD]]
26;
27; GCN-LABEL: load_atomic_private_seq_cst_i64:
28; GCN:       ; %bb.0:
29; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GCN-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
31; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
32; GCN-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 offen
33; GCN-NEXT:    s_waitcnt vmcnt(0)
34; GCN-NEXT:    s_setpc_b64 s[30:31]
35  %load = load atomic i64, ptr addrspace(5) %ptr seq_cst, align 8
36  ret i64 %load
37}
38
39define void @atomic_store_seq_cst_i32(ptr addrspace(5) %ptr, i32 %val) {
40; IR-LABEL: define void @atomic_store_seq_cst_i32(
41; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
42; IR-NEXT:    store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4
43; IR-NEXT:    ret void
44;
45; GCN-LABEL: atomic_store_seq_cst_i32:
46; GCN:       ; %bb.0:
47; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
49; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
50; GCN-NEXT:    s_setpc_b64 s[30:31]
51  store atomic i32 %val, ptr addrspace(5) %ptr seq_cst, align 4
52  ret void
53}
54
55define void @atomic_store_seq_cst_i64(ptr addrspace(5) %ptr, i64 %val) {
56; IR-LABEL: define void @atomic_store_seq_cst_i64(
57; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i64 [[VAL:%.*]]) #[[ATTR0]] {
58; IR-NEXT:    store i64 [[VAL]], ptr addrspace(5) [[PTR]], align 8
59; IR-NEXT:    ret void
60;
61; GCN-LABEL: atomic_store_seq_cst_i64:
62; GCN:       ; %bb.0:
63; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GCN-NEXT:    v_add_i32_e32 v3, vcc, 4, v0
65; GCN-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
66; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
67; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
68; GCN-NEXT:    s_setpc_b64 s[30:31]
69  store atomic i64 %val, ptr addrspace(5) %ptr seq_cst, align 8
70  ret void
71}
72
73define i32 @load_atomic_private_seq_cst_syncscope_i32(ptr addrspace(5) %ptr) {
74; IR-LABEL: define i32 @load_atomic_private_seq_cst_syncscope_i32(
75; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
76; IR-NEXT:    [[LOAD:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
77; IR-NEXT:    ret i32 [[LOAD]]
78;
79; GCN-LABEL: load_atomic_private_seq_cst_syncscope_i32:
80; GCN:       ; %bb.0:
81; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
83; GCN-NEXT:    s_waitcnt vmcnt(0)
84; GCN-NEXT:    s_setpc_b64 s[30:31]
85  %load = load atomic i32, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4
86  ret i32 %load
87}
88
89define void @atomic_store_seq_cst_syncscope_i32(ptr addrspace(5) %ptr, i32 %val) {
90; IR-LABEL: define void @atomic_store_seq_cst_syncscope_i32(
91; IR-SAME: ptr addrspace(5) [[PTR:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
92; IR-NEXT:    store i32 [[VAL]], ptr addrspace(5) [[PTR]], align 4
93; IR-NEXT:    ret void
94;
95; GCN-LABEL: atomic_store_seq_cst_syncscope_i32:
96; GCN:       ; %bb.0:
97; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
99; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
100; GCN-NEXT:    s_setpc_b64 s[30:31]
101  store atomic i32 %val, ptr addrspace(5) %ptr syncscope("agent") seq_cst, align 4
102  ret void
103}
104
105define i32 @cmpxchg_private_i32(ptr addrspace(5) %ptr) {
106; IR-LABEL: define i32 @cmpxchg_private_i32(
107; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
108; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
109; IR-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
110; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 1, i32 [[TMP1]]
111; IR-NEXT:    store i32 [[TMP3]], ptr addrspace(5) [[PTR]], align 4
112; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP1]], 0
113; IR-NEXT:    [[TMP5:%.*]] = insertvalue { i32, i1 } [[TMP4]], i1 [[TMP2]], 1
114; IR-NEXT:    [[RESULT_0:%.*]] = extractvalue { i32, i1 } [[TMP5]], 0
115; IR-NEXT:    [[RESULT_1:%.*]] = extractvalue { i32, i1 } [[TMP5]], 1
116; IR-NEXT:    store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1
117; IR-NEXT:    ret i32 [[RESULT_0]]
118;
119; GCN-LABEL: cmpxchg_private_i32:
120; GCN:       ; %bb.0:
121; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
123; GCN-NEXT:    s_mov_b32 s7, 0xf000
124; GCN-NEXT:    s_mov_b32 s6, -1
125; GCN-NEXT:    s_waitcnt vmcnt(0)
126; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
127; GCN-NEXT:    v_cndmask_b32_e64 v2, v1, 1, vcc
128; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
129; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
130; GCN-NEXT:    buffer_store_byte v0, off, s[4:7], 0
131; GCN-NEXT:    s_waitcnt expcnt(0)
132; GCN-NEXT:    v_mov_b32_e32 v0, v1
133; GCN-NEXT:    s_waitcnt vmcnt(0)
134; GCN-NEXT:    s_setpc_b64 s[30:31]
135  %result = cmpxchg ptr addrspace(5) %ptr, i32 0, i32 1 acq_rel monotonic
136  %result.0 = extractvalue { i32, i1 } %result, 0
137  %result.1 = extractvalue { i32, i1 } %result, 1
138  store i1 %result.1, ptr addrspace(1) poison
139  ret i32 %result.0
140}
141
142define i64 @cmpxchg_private_i64(ptr addrspace(5) %ptr) {
143; IR-LABEL: define i64 @cmpxchg_private_i64(
144; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
145; IR-NEXT:    [[TMP1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 8
146; IR-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0
147; IR-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i64 1, i64 [[TMP1]]
148; IR-NEXT:    store i64 [[TMP3]], ptr addrspace(5) [[PTR]], align 8
149; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i64, i1 } poison, i64 [[TMP1]], 0
150; IR-NEXT:    [[TMP5:%.*]] = insertvalue { i64, i1 } [[TMP4]], i1 [[TMP2]], 1
151; IR-NEXT:    [[RESULT_0:%.*]] = extractvalue { i64, i1 } [[TMP5]], 0
152; IR-NEXT:    [[RESULT_1:%.*]] = extractvalue { i64, i1 } [[TMP5]], 1
153; IR-NEXT:    store i1 [[RESULT_1]], ptr addrspace(1) poison, align 1
154; IR-NEXT:    ret i64 [[RESULT_0]]
155;
156; GCN-LABEL: cmpxchg_private_i64:
157; GCN:       ; %bb.0:
158; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GCN-NEXT:    v_mov_b32_e32 v2, v0
160; GCN-NEXT:    v_add_i32_e32 v3, vcc, 4, v2
161; GCN-NEXT:    buffer_load_dword v1, v3, s[0:3], 0 offen
162; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
163; GCN-NEXT:    s_mov_b32 s7, 0xf000
164; GCN-NEXT:    s_mov_b32 s6, -1
165; GCN-NEXT:    s_waitcnt vmcnt(0)
166; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[0:1]
167; GCN-NEXT:    v_cndmask_b32_e64 v4, v1, 0, vcc
168; GCN-NEXT:    buffer_store_dword v4, v3, s[0:3], 0 offen
169; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, 1, vcc
170; GCN-NEXT:    s_waitcnt expcnt(0)
171; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
172; GCN-NEXT:    buffer_store_dword v3, v2, s[0:3], 0 offen
173; GCN-NEXT:    buffer_store_byte v4, off, s[4:7], 0
174; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
175; GCN-NEXT:    s_setpc_b64 s[30:31]
176  %result = cmpxchg ptr addrspace(5) %ptr, i64 0, i64 1 acq_rel monotonic
177  %result.0 = extractvalue { i64, i1 } %result, 0
178  %result.1 = extractvalue { i64, i1 } %result, 1
179  store i1 %result.1, ptr addrspace(1) poison
180  ret i64 %result.0
181}
182
183
184define i32 @atomicrmw_xchg_private_i32(ptr addrspace(5) %ptr) {
185; IR-LABEL: define i32 @atomicrmw_xchg_private_i32(
186; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
187; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
188; IR-NEXT:    store i32 4, ptr addrspace(5) [[PTR]], align 4
189; IR-NEXT:    ret i32 [[TMP1]]
190;
191; GCN-LABEL: atomicrmw_xchg_private_i32:
192; GCN:       ; %bb.0:
193; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
195; GCN-NEXT:    v_mov_b32_e32 v2, 4
196; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
197; GCN-NEXT:    s_waitcnt vmcnt(1)
198; GCN-NEXT:    v_mov_b32_e32 v0, v1
199; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
200; GCN-NEXT:    s_setpc_b64 s[30:31]
201  %result = atomicrmw xchg ptr addrspace(5) %ptr, i32 4 seq_cst
202  ret i32 %result
203}
204
205define i32 @atomicrmw_add_private_i32(ptr addrspace(5) %ptr) {
206; IR-LABEL: define i32 @atomicrmw_add_private_i32(
207; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
208; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
209; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP1]], 4
210; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
211; IR-NEXT:    ret i32 [[TMP1]]
212;
213; GCN-LABEL: atomicrmw_add_private_i32:
214; GCN:       ; %bb.0:
215; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
217; GCN-NEXT:    s_waitcnt vmcnt(0)
218; GCN-NEXT:    v_add_i32_e32 v2, vcc, 4, v1
219; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
220; GCN-NEXT:    v_mov_b32_e32 v0, v1
221; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
222; GCN-NEXT:    s_setpc_b64 s[30:31]
223  %result = atomicrmw add ptr addrspace(5) %ptr, i32 4 seq_cst
224  ret i32 %result
225}
226
227define i32 @atomicrmw_sub_private_i32(ptr addrspace(5) %ptr) {
228; IR-LABEL: define i32 @atomicrmw_sub_private_i32(
229; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
230; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
231; IR-NEXT:    [[NEW:%.*]] = sub i32 [[TMP1]], 4
232; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
233; IR-NEXT:    ret i32 [[TMP1]]
234;
235; GCN-LABEL: atomicrmw_sub_private_i32:
236; GCN:       ; %bb.0:
237; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
239; GCN-NEXT:    s_waitcnt vmcnt(0)
240; GCN-NEXT:    v_add_i32_e32 v2, vcc, -4, v1
241; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
242; GCN-NEXT:    v_mov_b32_e32 v0, v1
243; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
244; GCN-NEXT:    s_setpc_b64 s[30:31]
245  %result = atomicrmw sub ptr addrspace(5) %ptr, i32 4 seq_cst
246  ret i32 %result
247}
248
249define i32 @atomicrmw_and_private_i32(ptr addrspace(5) %ptr) {
250; IR-LABEL: define i32 @atomicrmw_and_private_i32(
251; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
252; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
253; IR-NEXT:    [[NEW:%.*]] = and i32 [[TMP1]], 4
254; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
255; IR-NEXT:    ret i32 [[TMP1]]
256;
257; GCN-LABEL: atomicrmw_and_private_i32:
258; GCN:       ; %bb.0:
259; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
261; GCN-NEXT:    s_waitcnt vmcnt(0)
262; GCN-NEXT:    v_and_b32_e32 v2, 4, v1
263; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
264; GCN-NEXT:    v_mov_b32_e32 v0, v1
265; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
266; GCN-NEXT:    s_setpc_b64 s[30:31]
267  %result = atomicrmw and ptr addrspace(5) %ptr, i32 4 seq_cst
268  ret i32 %result
269}
270
271define i32 @atomicrmw_nand_private_i32(ptr addrspace(5) %ptr) {
272; IR-LABEL: define i32 @atomicrmw_nand_private_i32(
273; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
274; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
275; IR-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 4
276; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP2]], -1
277; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
278; IR-NEXT:    ret i32 [[TMP1]]
279;
280; GCN-LABEL: atomicrmw_nand_private_i32:
281; GCN:       ; %bb.0:
282; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
284; GCN-NEXT:    s_waitcnt vmcnt(0)
285; GCN-NEXT:    v_not_b32_e32 v2, v1
286; GCN-NEXT:    v_or_b32_e32 v2, -5, v2
287; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
288; GCN-NEXT:    v_mov_b32_e32 v0, v1
289; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
290; GCN-NEXT:    s_setpc_b64 s[30:31]
291  %result = atomicrmw nand ptr addrspace(5) %ptr, i32 4 seq_cst
292  ret i32 %result
293}
294
295define i32 @atomicrmw_or_private_i32(ptr addrspace(5) %ptr) {
296; IR-LABEL: define i32 @atomicrmw_or_private_i32(
297; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
298; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
299; IR-NEXT:    [[NEW:%.*]] = or i32 [[TMP1]], 4
300; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
301; IR-NEXT:    ret i32 [[TMP1]]
302;
303; GCN-LABEL: atomicrmw_or_private_i32:
304; GCN:       ; %bb.0:
305; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
307; GCN-NEXT:    s_waitcnt vmcnt(0)
308; GCN-NEXT:    v_or_b32_e32 v2, 4, v1
309; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
310; GCN-NEXT:    v_mov_b32_e32 v0, v1
311; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
312; GCN-NEXT:    s_setpc_b64 s[30:31]
313  %result = atomicrmw or ptr addrspace(5) %ptr, i32 4 seq_cst
314  ret i32 %result
315}
316
317define i32 @atomicrmw_xor_private_i32(ptr addrspace(5) %ptr) {
318; IR-LABEL: define i32 @atomicrmw_xor_private_i32(
319; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
320; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
321; IR-NEXT:    [[NEW:%.*]] = xor i32 [[TMP1]], 4
322; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
323; IR-NEXT:    ret i32 [[TMP1]]
324;
325; GCN-LABEL: atomicrmw_xor_private_i32:
326; GCN:       ; %bb.0:
327; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
329; GCN-NEXT:    s_waitcnt vmcnt(0)
330; GCN-NEXT:    v_xor_b32_e32 v2, 4, v1
331; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
332; GCN-NEXT:    v_mov_b32_e32 v0, v1
333; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
334; GCN-NEXT:    s_setpc_b64 s[30:31]
335  %result = atomicrmw xor ptr addrspace(5) %ptr, i32 4 seq_cst
336  ret i32 %result
337}
338
339define i32 @atomicrmw_max_private_i32(ptr addrspace(5) %ptr) {
340; IR-LABEL: define i32 @atomicrmw_max_private_i32(
341; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
342; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
343; IR-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP1]], 4
344; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
345; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
346; IR-NEXT:    ret i32 [[TMP1]]
347;
348; GCN-LABEL: atomicrmw_max_private_i32:
349; GCN:       ; %bb.0:
350; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
352; GCN-NEXT:    s_waitcnt vmcnt(0)
353; GCN-NEXT:    v_max_i32_e32 v2, 4, v1
354; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
355; GCN-NEXT:    v_mov_b32_e32 v0, v1
356; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
357; GCN-NEXT:    s_setpc_b64 s[30:31]
358  %result = atomicrmw max ptr addrspace(5) %ptr, i32 4 seq_cst
359  ret i32 %result
360}
361
362define i32 @atomicrmw_min_private_i32(ptr addrspace(5) %ptr) {
363; IR-LABEL: define i32 @atomicrmw_min_private_i32(
364; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
365; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
366; IR-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 4
367; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
368; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
369; IR-NEXT:    ret i32 [[TMP1]]
370;
371; GCN-LABEL: atomicrmw_min_private_i32:
372; GCN:       ; %bb.0:
373; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
375; GCN-NEXT:    s_waitcnt vmcnt(0)
376; GCN-NEXT:    v_min_i32_e32 v2, 4, v1
377; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
378; GCN-NEXT:    v_mov_b32_e32 v0, v1
379; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
380; GCN-NEXT:    s_setpc_b64 s[30:31]
381  %result = atomicrmw min ptr addrspace(5) %ptr, i32 4 seq_cst
382  ret i32 %result
383}
384
385define i32 @atomicrmw_umax_private_i32(ptr addrspace(5) %ptr) {
386; IR-LABEL: define i32 @atomicrmw_umax_private_i32(
387; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
388; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
389; IR-NEXT:    [[TMP2:%.*]] = icmp ugt i32 [[TMP1]], 4
390; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
391; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
392; IR-NEXT:    ret i32 [[TMP1]]
393;
394; GCN-LABEL: atomicrmw_umax_private_i32:
395; GCN:       ; %bb.0:
396; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
398; GCN-NEXT:    s_waitcnt vmcnt(0)
399; GCN-NEXT:    v_max_u32_e32 v2, 4, v1
400; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
401; GCN-NEXT:    v_mov_b32_e32 v0, v1
402; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
403; GCN-NEXT:    s_setpc_b64 s[30:31]
404  %result = atomicrmw umax ptr addrspace(5) %ptr, i32 4 seq_cst
405  ret i32 %result
406}
407
408define i32 @atomicrmw_umin_private_i32(ptr addrspace(5) %ptr) {
409; IR-LABEL: define i32 @atomicrmw_umin_private_i32(
410; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
411; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
412; IR-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP1]], 4
413; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP2]], i32 [[TMP1]], i32 4
414; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
415; IR-NEXT:    ret i32 [[TMP1]]
416;
417; GCN-LABEL: atomicrmw_umin_private_i32:
418; GCN:       ; %bb.0:
419; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
420; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
421; GCN-NEXT:    s_waitcnt vmcnt(0)
422; GCN-NEXT:    v_min_u32_e32 v2, 4, v1
423; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
424; GCN-NEXT:    v_mov_b32_e32 v0, v1
425; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
426; GCN-NEXT:    s_setpc_b64 s[30:31]
427  %result = atomicrmw umin ptr addrspace(5) %ptr, i32 4 seq_cst
428  ret i32 %result
429}
430
431define float @atomicrmw_fadd_private_f32(ptr addrspace(5) %ptr) {
432; IR-LABEL: define float @atomicrmw_fadd_private_f32(
433; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
434; IR-NEXT:    [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4
435; IR-NEXT:    [[NEW:%.*]] = fadd float [[TMP1]], 2.000000e+00
436; IR-NEXT:    store float [[NEW]], ptr addrspace(5) [[PTR]], align 4
437; IR-NEXT:    ret float [[TMP1]]
438;
439; GCN-LABEL: atomicrmw_fadd_private_f32:
440; GCN:       ; %bb.0:
441; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
443; GCN-NEXT:    s_waitcnt vmcnt(0)
444; GCN-NEXT:    v_add_f32_e32 v2, 2.0, v1
445; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
446; GCN-NEXT:    v_mov_b32_e32 v0, v1
447; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
448; GCN-NEXT:    s_setpc_b64 s[30:31]
449  %result = atomicrmw fadd ptr addrspace(5) %ptr, float 2.0 seq_cst
450  ret float %result
451}
452
453define bfloat @atomicrmw_fadd_private_bf16(ptr addrspace(5) %ptr) {
454; IR-LABEL: define bfloat @atomicrmw_fadd_private_bf16(
455; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
456; IR-NEXT:    [[TMP1:%.*]] = load bfloat, ptr addrspace(5) [[PTR]], align 2
457; IR-NEXT:    [[NEW:%.*]] = fadd bfloat [[TMP1]], 0xR4000
458; IR-NEXT:    store bfloat [[NEW]], ptr addrspace(5) [[PTR]], align 2
459; IR-NEXT:    ret bfloat [[TMP1]]
460;
461; GCN-LABEL: atomicrmw_fadd_private_bf16:
462; GCN:       ; %bb.0:
463; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
464; GCN-NEXT:    buffer_load_ushort v1, v0, s[0:3], 0 offen
465; GCN-NEXT:    s_waitcnt vmcnt(0)
466; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
467; GCN-NEXT:    v_add_f32_e32 v2, 2.0, v1
468; GCN-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
469; GCN-NEXT:    buffer_store_short v2, v0, s[0:3], 0 offen
470; GCN-NEXT:    v_mov_b32_e32 v0, v1
471; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
472; GCN-NEXT:    s_setpc_b64 s[30:31]
473  %result = atomicrmw fadd ptr addrspace(5) %ptr, bfloat 2.0 seq_cst
474  ret bfloat %result
475}
476
477define float @atomicrmw_fsub_private_i32(ptr addrspace(5) %ptr, float %val) {
478; IR-LABEL: define float @atomicrmw_fsub_private_i32(
479; IR-SAME: ptr addrspace(5) [[PTR:%.*]], float [[VAL:%.*]]) #[[ATTR0]] {
480; IR-NEXT:    [[TMP1:%.*]] = load float, ptr addrspace(5) [[PTR]], align 4
481; IR-NEXT:    [[NEW:%.*]] = fsub float [[TMP1]], [[VAL]]
482; IR-NEXT:    store float [[NEW]], ptr addrspace(5) [[PTR]], align 4
483; IR-NEXT:    ret float [[TMP1]]
484;
485; GCN-LABEL: atomicrmw_fsub_private_i32:
486; GCN:       ; %bb.0:
487; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GCN-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
489; GCN-NEXT:    s_waitcnt vmcnt(0)
490; GCN-NEXT:    v_sub_f32_e32 v1, v2, v1
491; GCN-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
492; GCN-NEXT:    v_mov_b32_e32 v0, v2
493; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
494; GCN-NEXT:    s_setpc_b64 s[30:31]
495  %result = atomicrmw fsub ptr addrspace(5) %ptr, float %val seq_cst
496  ret float %result
497}
498
499define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(ptr addrspace(1) %out, i32 %in) nounwind {
500; IR-LABEL: define amdgpu_kernel void @alloca_promote_atomicrmw_private_lds_promote(
501; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1:[0-9]+]] {
502; IR-NEXT:  entry:
503; IR-NEXT:    [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
504; IR-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1
505; IR-NEXT:    store i32 0, ptr addrspace(5) [[TMP]], align 4
506; IR-NEXT:    store i32 1, ptr addrspace(5) [[GEP2]], align 4
507; IR-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]]
508; IR-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4
509; IR-NEXT:    [[NEW:%.*]] = add i32 [[TMP0]], 7
510; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[GEP3]], align 4
511; IR-NEXT:    store i32 [[TMP0]], ptr addrspace(1) [[OUT]], align 4
512; IR-NEXT:    ret void
513;
514; GCN-LABEL: alloca_promote_atomicrmw_private_lds_promote:
515; GCN:       ; %bb.0: ; %entry
516; GCN-NEXT:    s_load_dword s6, s[4:5], 0xb
517; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
518; GCN-NEXT:    s_mov_b32 s3, 0xf000
519; GCN-NEXT:    s_mov_b32 s2, -1
520; GCN-NEXT:    s_waitcnt lgkmcnt(0)
521; GCN-NEXT:    s_cmp_eq_u32 s6, 1
522; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
523; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
524; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
525; GCN-NEXT:    s_endpgm
526entry:
527  %tmp = alloca [2 x i32], addrspace(5)
528  %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
529  store i32 0, ptr addrspace(5) %tmp
530  store i32 1, ptr addrspace(5) %gep2
531  %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in
532  %rmw = atomicrmw add ptr addrspace(5) %gep3, i32 7 acq_rel
533  store i32 %rmw, ptr addrspace(1) %out
534  ret void
535}
536
537define amdgpu_kernel void @alloca_promote_cmpxchg_private(ptr addrspace(1) %out, i32 %in) nounwind {
538; IR-LABEL: define amdgpu_kernel void @alloca_promote_cmpxchg_private(
539; IR-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[IN:%.*]]) #[[ATTR1]] {
540; IR-NEXT:  entry:
541; IR-NEXT:    [[TMP:%.*]] = alloca [2 x i32], align 4, addrspace(5)
542; IR-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 1
543; IR-NEXT:    store i32 0, ptr addrspace(5) [[TMP]], align 4
544; IR-NEXT:    store i32 1, ptr addrspace(5) [[GEP2]], align 4
545; IR-NEXT:    [[GEP3:%.*]] = getelementptr inbounds [2 x i32], ptr addrspace(5) [[TMP]], i32 0, i32 [[IN]]
546; IR-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(5) [[GEP3]], align 4
547; IR-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
548; IR-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 1, i32 [[TMP0]]
549; IR-NEXT:    store i32 [[TMP2]], ptr addrspace(5) [[GEP3]], align 4
550; IR-NEXT:    [[TMP3:%.*]] = insertvalue { i32, i1 } poison, i32 [[TMP0]], 0
551; IR-NEXT:    [[TMP4:%.*]] = insertvalue { i32, i1 } [[TMP3]], i1 [[TMP1]], 1
552; IR-NEXT:    [[VAL:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0
553; IR-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
554; IR-NEXT:    ret void
555;
556; GCN-LABEL: alloca_promote_cmpxchg_private:
557; GCN:       ; %bb.0: ; %entry
558; GCN-NEXT:    s_load_dword s6, s[4:5], 0xb
559; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
560; GCN-NEXT:    s_mov_b32 s3, 0xf000
561; GCN-NEXT:    s_mov_b32 s2, -1
562; GCN-NEXT:    s_waitcnt lgkmcnt(0)
563; GCN-NEXT:    s_cmp_eq_u32 s6, 1
564; GCN-NEXT:    s_cselect_b64 s[4:5], -1, 0
565; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
566; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
567; GCN-NEXT:    s_endpgm
568entry:
569  %tmp = alloca [2 x i32], addrspace(5)
570  %gep2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1
571  store i32 0, ptr addrspace(5) %tmp
572  store i32 1, ptr addrspace(5) %gep2
573  %gep3 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 %in
574  %xchg = cmpxchg ptr addrspace(5) %gep3, i32 0, i32 1 acq_rel monotonic
575  %val = extractvalue { i32, i1 } %xchg, 0
576  store i32 %val, ptr addrspace(1) %out
577  ret void
578}
579
580define i32 @atomicrmw_inc_private_i32(ptr addrspace(5) %ptr) {
581; IR-LABEL: define i32 @atomicrmw_inc_private_i32(
582; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
583; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
584; IR-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
585; IR-NEXT:    [[TMP3:%.*]] = icmp uge i32 [[TMP1]], 4
586; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP3]], i32 0, i32 [[TMP2]]
587; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
588; IR-NEXT:    ret i32 [[TMP1]]
589;
590; GCN-LABEL: atomicrmw_inc_private_i32:
591; GCN:       ; %bb.0:
592; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
593; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
594; GCN-NEXT:    s_waitcnt vmcnt(0)
595; GCN-NEXT:    v_add_i32_e32 v2, vcc, 1, v1
596; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 4, v1
597; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
598; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
599; GCN-NEXT:    v_mov_b32_e32 v0, v1
600; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
601; GCN-NEXT:    s_setpc_b64 s[30:31]
602  %result = atomicrmw uinc_wrap ptr addrspace(5) %ptr, i32 4 seq_cst
603  ret i32 %result
604}
605
606define i32 @atomicrmw_dec_private_i32(ptr addrspace(5) %ptr) {
607; IR-LABEL: define i32 @atomicrmw_dec_private_i32(
608; IR-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
609; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(5) [[PTR]], align 4
610; IR-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], 1
611; IR-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP1]], 0
612; IR-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP1]], 4
613; IR-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
614; IR-NEXT:    [[NEW:%.*]] = select i1 [[TMP5]], i32 4, i32 [[TMP2]]
615; IR-NEXT:    store i32 [[NEW]], ptr addrspace(5) [[PTR]], align 4
616; IR-NEXT:    ret i32 [[TMP1]]
617;
618; GCN-LABEL: atomicrmw_dec_private_i32:
619; GCN:       ; %bb.0:
620; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GCN-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen
622; GCN-NEXT:    s_waitcnt vmcnt(0)
623; GCN-NEXT:    v_add_i32_e32 v2, vcc, -1, v1
624; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
625; GCN-NEXT:    v_cmp_lt_u32_e64 s[4:5], 4, v1
626; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
627; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, 4, s[4:5]
628; GCN-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen
629; GCN-NEXT:    v_mov_b32_e32 v0, v1
630; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
631; GCN-NEXT:    s_setpc_b64 s[30:31]
632  %result = atomicrmw udec_wrap ptr addrspace(5) %ptr, i32 4 seq_cst
633  ret i32 %result
634}
635