xref: /llvm-project/llvm/test/CodeGen/AMDGPU/local-atomics64.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SI,SICIVI %s
2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,SICIVI,GFX89 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -enable-var-scope -strict-whitespace -check-prefixes=GCN,GFX9,GFX89 %s
4
5; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64:
6; SICIVI: s_mov_b32 m0
7; GFX9-NOT: m0
8
9; GCN: ds_wrxchg_rtn_b64
10; GCN: s_endpgm
11define amdgpu_kernel void @lds_atomic_xchg_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
12  %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst
13  store i64 %result, ptr addrspace(1) %out, align 8
14  ret void
15}
16
17; GCN-LABEL: {{^}}lds_atomic_xchg_ret_i64_offset:
18; SICIVI: s_mov_b32 m0
19; GFX9-NOT: m0
20
21; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
22; GCN: s_endpgm
23define amdgpu_kernel void @lds_atomic_xchg_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
24  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
25  %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst
26  store i64 %result, ptr addrspace(1) %out, align 8
27  ret void
28}
29
30; GCN-LABEL: {{^}}lds_atomic_xchg_ret_f64_offset:
31; SICIVI: s_mov_b32 m0
32; GFX9-NOT: m0
33
34; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
35; GCN: s_endpgm
36define amdgpu_kernel void @lds_atomic_xchg_ret_f64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
37  %gep = getelementptr double, ptr addrspace(3) %ptr, i32 4
38  %result = atomicrmw xchg ptr addrspace(3) %gep, double 4.0 seq_cst
39  store double %result, ptr addrspace(1) %out, align 8
40  ret void
41}
42
43; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset:
44; SICIVI: s_mov_b32 m0
45; GFX9-NOT: m0
46
47; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
48; GCN: s_endpgm
49define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
50  %gep = getelementptr ptr, ptr addrspace(3) %ptr, i32 4
51  %result = atomicrmw xchg ptr addrspace(3) %gep, ptr null seq_cst
52  store ptr %result, ptr addrspace(1) %out, align 8
53  ret void
54}
55
56; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
57; SICIVI: s_mov_b32 m0
58; GFX9-NOT: m0
59
60; GCN: ds_add_rtn_u64
61; GCN: s_endpgm
62define amdgpu_kernel void @lds_atomic_add_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
63  %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst
64  store i64 %result, ptr addrspace(1) %out, align 8
65  ret void
66}
67
68; GCN-LABEL: {{^}}lds_atomic_add_ret_i64_offset:
69; SICIVI-DAG: s_mov_b32 m0
70; GFX9-NOT: m0
71
72; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
73; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
74; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
75; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
76; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
77; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
78; GCN: buffer_store_dwordx2 [[RESULT]],
79; GCN: s_endpgm
80define amdgpu_kernel void @lds_atomic_add_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
81  %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
82  %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst
83  store i64 %result, ptr addrspace(1) %out, align 8
84  ret void
85}
86
87; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64:
88; SICIVI-DAG: s_mov_b32 m0
89; GFX9-NOT: m0
90
91; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
92; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
93; GCN: ds_add_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
94; GCN: buffer_store_dwordx2 [[RESULT]],
95; GCN: s_endpgm
96define amdgpu_kernel void @lds_atomic_add1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
97  %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst
98  store i64 %result, ptr addrspace(1) %out, align 8
99  ret void
100}
101
102; GCN-LABEL: {{^}}lds_atomic_add1_ret_i64_offset:
103; SICIVI: s_mov_b32 m0
104; GFX9-NOT: m0
105
106; GCN: ds_add_rtn_u64 {{.*}} offset:32
107; GCN: s_endpgm
108define amdgpu_kernel void @lds_atomic_add1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
109  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
110  %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst
111  store i64 %result, ptr addrspace(1) %out, align 8
112  ret void
113}
114
115; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64:
116; SICIVI: s_mov_b32 m0
117; GFX9-NOT: m0
118
119; GCN: ds_sub_rtn_u64
120; GCN: s_endpgm
121define amdgpu_kernel void @lds_atomic_sub_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
122  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst
123  store i64 %result, ptr addrspace(1) %out, align 8
124  ret void
125}
126
127; GCN-LABEL: {{^}}lds_atomic_sub_ret_i64_offset:
128; SICIVI: s_mov_b32 m0
129; GFX9-NOT: m0
130
131; GCN: ds_sub_rtn_u64 {{.*}} offset:32
132; GCN: s_endpgm
133define amdgpu_kernel void @lds_atomic_sub_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
134  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
135  %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst
136  store i64 %result, ptr addrspace(1) %out, align 8
137  ret void
138}
139
140; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64:
141; SICIVI-DAG: s_mov_b32 m0
142; GFX9-NOT: m0
143
144; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
145; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
146; GCN: ds_sub_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
147; GCN: buffer_store_dwordx2 [[RESULT]],
148; GCN: s_endpgm
149define amdgpu_kernel void @lds_atomic_sub1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
150  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst
151  store i64 %result, ptr addrspace(1) %out, align 8
152  ret void
153}
154
155; GCN-LABEL: {{^}}lds_atomic_sub1_ret_i64_offset:
156; SICIVI: s_mov_b32 m0
157; GFX9-NOT: m0
158
159; GCN: ds_sub_rtn_u64 {{.*}} offset:32
160; GCN: s_endpgm
161define amdgpu_kernel void @lds_atomic_sub1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
162  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
163  %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst
164  store i64 %result, ptr addrspace(1) %out, align 8
165  ret void
166}
167
168; GCN-LABEL: {{^}}lds_atomic_and_ret_i64:
169; SICIVI: s_mov_b32 m0
170; GFX9-NOT: m0
171
172; GCN: ds_and_rtn_b64
173; GCN: s_endpgm
174define amdgpu_kernel void @lds_atomic_and_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
175  %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst
176  store i64 %result, ptr addrspace(1) %out, align 8
177  ret void
178}
179
180; GCN-LABEL: {{^}}lds_atomic_and_ret_i64_offset:
181; SICIVI: s_mov_b32 m0
182; GFX9-NOT: m0
183
184; GCN: ds_and_rtn_b64 {{.*}} offset:32
185; GCN: s_endpgm
186define amdgpu_kernel void @lds_atomic_and_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
187  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
188  %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst
189  store i64 %result, ptr addrspace(1) %out, align 8
190  ret void
191}
192
193; GCN-LABEL: {{^}}lds_atomic_or_ret_i64:
194; SICIVI: s_mov_b32 m0
195; GFX9-NOT: m0
196
197; GCN: ds_or_rtn_b64
198; GCN: s_endpgm
199define amdgpu_kernel void @lds_atomic_or_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
200  %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst
201  store i64 %result, ptr addrspace(1) %out, align 8
202  ret void
203}
204
205; GCN-LABEL: {{^}}lds_atomic_or_ret_i64_offset:
206; SICIVI: s_mov_b32 m0
207; GFX9-NOT: m0
208
209; GCN: ds_or_rtn_b64 {{.*}} offset:32
210; GCN: s_endpgm
211define amdgpu_kernel void @lds_atomic_or_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
212  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
213  %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst
214  store i64 %result, ptr addrspace(1) %out, align 8
215  ret void
216}
217
218; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64:
219; SICIVI: s_mov_b32 m0
220; GFX9-NOT: m0
221
222; GCN: ds_xor_rtn_b64
223; GCN: s_endpgm
224define amdgpu_kernel void @lds_atomic_xor_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
225  %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst
226  store i64 %result, ptr addrspace(1) %out, align 8
227  ret void
228}
229
230; GCN-LABEL: {{^}}lds_atomic_xor_ret_i64_offset:
231; SICIVI: s_mov_b32 m0
232; GFX9-NOT: m0
233
234; GCN: ds_xor_rtn_b64 {{.*}} offset:32
235; GCN: s_endpgm
236define amdgpu_kernel void @lds_atomic_xor_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
237  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
238  %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst
239  store i64 %result, ptr addrspace(1) %out, align 8
240  ret void
241}
242
243; FIXME: There is no atomic nand instr
244; XGCN-LABEL: {{^}}lds_atomic_nand_ret_i64:uction, so we somehow need to expand this.
245; define amdgpu_kernel void @lds_atomic_nand_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
246;   %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
247;   store i64 %result, ptr addrspace(1) %out, align 8
248;   ret void
249; }
250
251; GCN-LABEL: {{^}}lds_atomic_min_ret_i64:
252; SICIVI: s_mov_b32 m0
253; GFX9-NOT: m0
254
255; GCN: ds_min_rtn_i64
256; GCN: s_endpgm
257define amdgpu_kernel void @lds_atomic_min_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
258  %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst
259  store i64 %result, ptr addrspace(1) %out, align 8
260  ret void
261}
262
263; GCN-LABEL: {{^}}lds_atomic_min_ret_i64_offset:
264; SICIVI: s_mov_b32 m0
265; GFX9-NOT: m0
266
267; GCN: ds_min_rtn_i64 {{.*}} offset:32
268; GCN: s_endpgm
269define amdgpu_kernel void @lds_atomic_min_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
270  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
271  %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst
272  store i64 %result, ptr addrspace(1) %out, align 8
273  ret void
274}
275
276; GCN-LABEL: {{^}}lds_atomic_max_ret_i64:
277; SICIVI: s_mov_b32 m0
278; GFX9-NOT: m0
279
280; GCN: ds_max_rtn_i64
281; GCN: s_endpgm
282define amdgpu_kernel void @lds_atomic_max_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
283  %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst
284  store i64 %result, ptr addrspace(1) %out, align 8
285  ret void
286}
287
288; GCN-LABEL: {{^}}lds_atomic_max_ret_i64_offset:
289; SICIVI: s_mov_b32 m0
290; GFX9-NOT: m0
291
292; GCN: ds_max_rtn_i64 {{.*}} offset:32
293; GCN: s_endpgm
294define amdgpu_kernel void @lds_atomic_max_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
295  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
296  %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst
297  store i64 %result, ptr addrspace(1) %out, align 8
298  ret void
299}
300
301; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64:
302; SICIVI: s_mov_b32 m0
303; GFX9-NOT: m0
304
305; GCN: ds_min_rtn_u64
306; GCN: s_endpgm
307define amdgpu_kernel void @lds_atomic_umin_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
308  %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst
309  store i64 %result, ptr addrspace(1) %out, align 8
310  ret void
311}
312
313; GCN-LABEL: {{^}}lds_atomic_umin_ret_i64_offset:
314; SICIVI: s_mov_b32 m0
315; GFX9-NOT: m0
316
317; GCN: ds_min_rtn_u64 {{.*}} offset:32
318; GCN: s_endpgm
319define amdgpu_kernel void @lds_atomic_umin_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
320  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
321  %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst
322  store i64 %result, ptr addrspace(1) %out, align 8
323  ret void
324}
325
326; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64:
327; SICIVI: s_mov_b32 m0
328; GFX9-NOT: m0
329
330; GCN: ds_max_rtn_u64
331; GCN: s_endpgm
332define amdgpu_kernel void @lds_atomic_umax_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
333  %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst
334  store i64 %result, ptr addrspace(1) %out, align 8
335  ret void
336}
337
338; GCN-LABEL: {{^}}lds_atomic_umax_ret_i64_offset:
339; SICIVI: s_mov_b32 m0
340; GFX9-NOT: m0
341
342; GCN: ds_max_rtn_u64 {{.*}} offset:32
343; GCN: s_endpgm
344define amdgpu_kernel void @lds_atomic_umax_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
345  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
346  %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst
347  store i64 %result, ptr addrspace(1) %out, align 8
348  ret void
349}
350
351; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64:
352; SICIVI: s_mov_b32 m0
353; GFX9-NOT: m0
354
355; GCN: ds_wrxchg_rtn_b64
356; GCN: s_endpgm
357define amdgpu_kernel void @lds_atomic_xchg_noret_i64(ptr addrspace(3) %ptr) nounwind {
358  %result = atomicrmw xchg ptr addrspace(3) %ptr, i64 4 seq_cst
359  ret void
360}
361
362; GCN-LABEL: {{^}}lds_atomic_xchg_noret_i64_offset:
363; SICIVI: s_mov_b32 m0
364; GFX9-NOT: m0
365
366; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
367; GCN: s_endpgm
368define amdgpu_kernel void @lds_atomic_xchg_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
369  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
370  %result = atomicrmw xchg ptr addrspace(3) %gep, i64 4 seq_cst
371  ret void
372}
373
374; GCN-LABEL: {{^}}lds_atomic_add_noret_i64:
375; SICIVI: s_mov_b32 m0
376; GFX9-NOT: m0
377
378; GCN: ds_add_u64
379; GCN: s_endpgm
380define amdgpu_kernel void @lds_atomic_add_noret_i64(ptr addrspace(3) %ptr) nounwind {
381  %result = atomicrmw add ptr addrspace(3) %ptr, i64 4 seq_cst
382  ret void
383}
384
385; GCN-LABEL: {{^}}lds_atomic_add_noret_i64_offset:
386; SICIVI-DAG: s_mov_b32 m0
387; GFX9-NOT: m0
388
389; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x9
390; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x24
391; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
392; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
393; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
394; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]] offset:32
395; GCN: s_endpgm
396define amdgpu_kernel void @lds_atomic_add_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
397  %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
398  %result = atomicrmw add ptr addrspace(3) %gep, i64 9 seq_cst
399  ret void
400}
401
402; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64:
403; SICIVI-DAG: s_mov_b32 m0
404; GFX9-NOT: m0
405
406; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
407; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
408; GCN: ds_add_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
409; GCN: s_endpgm
410define amdgpu_kernel void @lds_atomic_add1_noret_i64(ptr addrspace(3) %ptr) nounwind {
411  %result = atomicrmw add ptr addrspace(3) %ptr, i64 1 seq_cst
412  ret void
413}
414
415; GCN-LABEL: {{^}}lds_atomic_add1_noret_i64_offset:
416; SICIVI: s_mov_b32 m0
417; GFX9-NOT: m0
418
419; GCN: ds_add_u64 {{.*}} offset:32
420; GCN: s_endpgm
421define amdgpu_kernel void @lds_atomic_add1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
422  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
423  %result = atomicrmw add ptr addrspace(3) %gep, i64 1 seq_cst
424  ret void
425}
426
427; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64:
428; SICIVI: s_mov_b32 m0
429; GFX9-NOT: m0
430
431; GCN: ds_sub_u64
432; GCN: s_endpgm
433define amdgpu_kernel void @lds_atomic_sub_noret_i64(ptr addrspace(3) %ptr) nounwind {
434  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 4 seq_cst
435  ret void
436}
437
438; GCN-LABEL: {{^}}lds_atomic_sub_noret_i64_offset:
439; SICIVI: s_mov_b32 m0
440; GFX9-NOT: m0
441
442; GCN: ds_sub_u64 {{.*}} offset:32
443; GCN: s_endpgm
444define amdgpu_kernel void @lds_atomic_sub_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
445  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
446  %result = atomicrmw sub ptr addrspace(3) %gep, i64 4 seq_cst
447  ret void
448}
449
450; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64:
451; SICIVI-DAG: s_mov_b32 m0
452; GFX9-NOT: m0
453
454; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
455; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
456; GCN: ds_sub_u64 {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
457; GCN: s_endpgm
458define amdgpu_kernel void @lds_atomic_sub1_noret_i64(ptr addrspace(3) %ptr) nounwind {
459  %result = atomicrmw sub ptr addrspace(3) %ptr, i64 1 seq_cst
460  ret void
461}
462
463; GCN-LABEL: {{^}}lds_atomic_sub1_noret_i64_offset:
464; SICIVI: s_mov_b32 m0
465; GFX9-NOT: m0
466
467; GCN: ds_sub_u64 {{.*}} offset:32
468; GCN: s_endpgm
469define amdgpu_kernel void @lds_atomic_sub1_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
470  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
471  %result = atomicrmw sub ptr addrspace(3) %gep, i64 1 seq_cst
472  ret void
473}
474
475; GCN-LABEL: {{^}}lds_atomic_and_noret_i64:
476; SICIVI: s_mov_b32 m0
477; GFX9-NOT: m0
478
479; GCN: ds_and_b64
480; GCN: s_endpgm
481define amdgpu_kernel void @lds_atomic_and_noret_i64(ptr addrspace(3) %ptr) nounwind {
482  %result = atomicrmw and ptr addrspace(3) %ptr, i64 4 seq_cst
483  ret void
484}
485
486; GCN-LABEL: {{^}}lds_atomic_and_noret_i64_offset:
487; SICIVI: s_mov_b32 m0
488; GFX9-NOT: m0
489
490; GCN: ds_and_b64 {{.*}} offset:32
491; GCN: s_endpgm
492define amdgpu_kernel void @lds_atomic_and_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
493  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
494  %result = atomicrmw and ptr addrspace(3) %gep, i64 4 seq_cst
495  ret void
496}
497
498; GCN-LABEL: {{^}}lds_atomic_or_noret_i64:
499; SICIVI: s_mov_b32 m0
500; GFX9-NOT: m0
501
502; GCN: ds_or_b64
503; GCN: s_endpgm
504define amdgpu_kernel void @lds_atomic_or_noret_i64(ptr addrspace(3) %ptr) nounwind {
505  %result = atomicrmw or ptr addrspace(3) %ptr, i64 4 seq_cst
506  ret void
507}
508
509; GCN-LABEL: {{^}}lds_atomic_or_noret_i64_offset:
510; SICIVI: s_mov_b32 m0
511; GFX9-NOT: m0
512
513; GCN: ds_or_b64 {{.*}} offset:32
514; GCN: s_endpgm
515define amdgpu_kernel void @lds_atomic_or_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
516  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
517  %result = atomicrmw or ptr addrspace(3) %gep, i64 4 seq_cst
518  ret void
519}
520
521; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64:
522; SICIVI: s_mov_b32 m0
523; GFX9-NOT: m0
524
525; GCN: ds_xor_b64
526; GCN: s_endpgm
527define amdgpu_kernel void @lds_atomic_xor_noret_i64(ptr addrspace(3) %ptr) nounwind {
528  %result = atomicrmw xor ptr addrspace(3) %ptr, i64 4 seq_cst
529  ret void
530}
531
532; GCN-LABEL: {{^}}lds_atomic_xor_noret_i64_offset:
533; SICIVI: s_mov_b32 m0
534; GFX9-NOT: m0
535
536; GCN: ds_xor_b64 {{.*}} offset:32
537; GCN: s_endpgm
538define amdgpu_kernel void @lds_atomic_xor_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
539  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
540  %result = atomicrmw xor ptr addrspace(3) %gep, i64 4 seq_cst
541  ret void
542}
543
544; FIXME: There is no atomic nand instr
545; XGCN-LABEL: {{^}}lds_atomic_nand_noret_i64:uction, so we somehow need to expand this.
546; define amdgpu_kernel void @lds_atomic_nand_noret_i64(ptr addrspace(3) %ptr) nounwind {
547;   %result = atomicrmw nand ptr addrspace(3) %ptr, i32 4 seq_cst
548;   ret void
549; }
550
551; GCN-LABEL: {{^}}lds_atomic_min_noret_i64:
552; SICIVI: s_mov_b32 m0
553; GFX9-NOT: m0
554
555; GCN: ds_min_i64
556; GCN: s_endpgm
557define amdgpu_kernel void @lds_atomic_min_noret_i64(ptr addrspace(3) %ptr) nounwind {
558  %result = atomicrmw min ptr addrspace(3) %ptr, i64 4 seq_cst
559  ret void
560}
561
562; GCN-LABEL: {{^}}lds_atomic_min_noret_i64_offset:
563; SICIVI: s_mov_b32 m0
564; GFX9-NOT: m0
565
566; GCN: ds_min_i64 {{.*}} offset:32
567; GCN: s_endpgm
568define amdgpu_kernel void @lds_atomic_min_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
569  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
570  %result = atomicrmw min ptr addrspace(3) %gep, i64 4 seq_cst
571  ret void
572}
573
574; GCN-LABEL: {{^}}lds_atomic_max_noret_i64:
575; SICIVI: s_mov_b32 m0
576; GFX9-NOT: m0
577
578; GCN: ds_max_i64
579; GCN: s_endpgm
580define amdgpu_kernel void @lds_atomic_max_noret_i64(ptr addrspace(3) %ptr) nounwind {
581  %result = atomicrmw max ptr addrspace(3) %ptr, i64 4 seq_cst
582  ret void
583}
584
585; GCN-LABEL: {{^}}lds_atomic_max_noret_i64_offset:
586; SICIVI: s_mov_b32 m0
587; GFX9-NOT: m0
588
589; GCN: ds_max_i64 {{.*}} offset:32
590; GCN: s_endpgm
591define amdgpu_kernel void @lds_atomic_max_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
592  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
593  %result = atomicrmw max ptr addrspace(3) %gep, i64 4 seq_cst
594  ret void
595}
596
597; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64:
598; SICIVI: s_mov_b32 m0
599; GFX9-NOT: m0
600
601; GCN: ds_min_u64
602; GCN: s_endpgm
603define amdgpu_kernel void @lds_atomic_umin_noret_i64(ptr addrspace(3) %ptr) nounwind {
604  %result = atomicrmw umin ptr addrspace(3) %ptr, i64 4 seq_cst
605  ret void
606}
607
608; GCN-LABEL: {{^}}lds_atomic_umin_noret_i64_offset:
609; SICIVI: s_mov_b32 m0
610; GFX9-NOT: m0
611
612; GCN: ds_min_u64 {{.*}} offset:32
613; GCN: s_endpgm
614define amdgpu_kernel void @lds_atomic_umin_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
615  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
616  %result = atomicrmw umin ptr addrspace(3) %gep, i64 4 seq_cst
617  ret void
618}
619
620; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64:
621; SICIVI: s_mov_b32 m0
622; GFX9-NOT: m0
623
624; GCN: ds_max_u64
625; GCN: s_endpgm
626define amdgpu_kernel void @lds_atomic_umax_noret_i64(ptr addrspace(3) %ptr) nounwind {
627  %result = atomicrmw umax ptr addrspace(3) %ptr, i64 4 seq_cst
628  ret void
629}
630
631; GCN-LABEL: {{^}}lds_atomic_umax_noret_i64_offset:
632; SICIVI: s_mov_b32 m0
633; GFX9-NOT: m0
634
635; GCN: ds_max_u64 {{.*}} offset:32
636; GCN: s_endpgm
637define amdgpu_kernel void @lds_atomic_umax_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
638  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
639  %result = atomicrmw umax ptr addrspace(3) %gep, i64 4 seq_cst
640  ret void
641}
642
643; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64:
644; SICIVI: s_mov_b32 m0
645; GFX9-NOT: m0
646
647; GCN: ds_inc_rtn_u64
648; GCN: s_endpgm
649define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
650  %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
651  store i64 %result, ptr addrspace(1) %out, align 8
652  ret void
653}
654
655; GCN-LABEL: {{^}}lds_atomic_inc_ret_i64_offset:
656; SICIVI-DAG: s_mov_b32 m0
657; GFX9-NOT: m0
658
659; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
660; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
661; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
662; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
663; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
664; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
665; GCN: buffer_store_dwordx2 [[RESULT]],
666; GCN: s_endpgm
667define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
668  %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
669  %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 9 seq_cst
670  store i64 %result, ptr addrspace(1) %out, align 8
671  ret void
672}
673
674; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64:
675; SICIVI-DAG: s_mov_b32 m0
676; GFX9-NOT: m0
677
678; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
679; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
680; GCN: ds_inc_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
681; GCN: buffer_store_dwordx2 [[RESULT]],
682; GCN: s_endpgm
683define amdgpu_kernel void @lds_atomic_inc1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
684  %result = atomicrmw uinc_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
685  store i64 %result, ptr addrspace(1) %out, align 8
686  ret void
687}
688
689; GCN-LABEL: {{^}}lds_atomic_inc1_ret_i64_offset:
690; SICIVI: s_mov_b32 m0
691; GFX9-NOT: m0
692
693; GCN: ds_inc_rtn_u64 {{.*}} offset:32
694; GCN: s_endpgm
695define amdgpu_kernel void @lds_atomic_inc1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
696  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
697  %result = atomicrmw uinc_wrap ptr addrspace(3) %gep, i64 1 seq_cst
698  store i64 %result, ptr addrspace(1) %out, align 8
699  ret void
700}
701
702; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64:
703; SICIVI: s_mov_b32 m0
704; GFX9-NOT: m0
705
706; GCN: ds_dec_rtn_u64
707; GCN: s_endpgm
708define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
709  %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 4 seq_cst
710  store i64 %result, ptr addrspace(1) %out, align 8
711  ret void
712}
713
714; GCN-LABEL: {{^}}lds_atomic_dec_ret_i64_offset:
715; SICIVI-DAG: s_mov_b32 m0
716; GFX9-NOT: m0
717
718; SI-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
719; GFX89-DAG: s_load_dword [[PTR:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
720; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 9
721; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0
722; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
723; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[VPTR]], v[[[LOVDATA]]:[[HIVDATA]]] offset:32
724; GCN: buffer_store_dwordx2 [[RESULT]],
725; GCN: s_endpgm
726define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
727  %gep = getelementptr i64, ptr addrspace(3) %ptr, i64 4
728  %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 9 seq_cst
729  store i64 %result, ptr addrspace(1) %out, align 8
730  ret void
731}
732
733; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64:
734; SICIVI-DAG: s_mov_b32 m0
735; GFX9-NOT: m0
736
737; GCN-DAG: v_mov_b32_e32 v[[LOVDATA:[0-9]+]], 1{{$}}
738; GCN-DAG: v_mov_b32_e32 v[[HIVDATA:[0-9]+]], 0{{$}}
739; GCN: ds_dec_rtn_u64 [[RESULT:v\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, v[[[LOVDATA]]:[[HIVDATA]]]
740; GCN: buffer_store_dwordx2 [[RESULT]],
741; GCN: s_endpgm
742define amdgpu_kernel void @lds_atomic_dec1_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
743  %result = atomicrmw udec_wrap ptr addrspace(3) %ptr, i64 1 seq_cst
744  store i64 %result, ptr addrspace(1) %out, align 8
745  ret void
746}
747
748; GCN-LABEL: {{^}}lds_atomic_dec1_ret_i64_offset:
749; SICIVI: s_mov_b32 m0
750; GFX9-NOT: m0
751
752; GCN: ds_dec_rtn_u64 {{.*}} offset:32
753; GCN: s_endpgm
754define amdgpu_kernel void @lds_atomic_dec1_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) nounwind {
755  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
756  %result = atomicrmw udec_wrap ptr addrspace(3) %gep, i64 1 seq_cst
757  store i64 %result, ptr addrspace(1) %out, align 8
758  ret void
759}
760