xref: /llvm-project/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
4
5declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3), i32)
6declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32)
7declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr, i32)
8
9define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
10; GFX12-SDAG-LABEL: flat_atomic_cond_sub_no_rtn_u32:
11; GFX12-SDAG:       ; %bb.0: ; %entry
12; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
13; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
14; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
15; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s2
16; GFX12-SDAG-NEXT:    flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
17; GFX12-SDAG-NEXT:    s_endpgm
18;
19; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32:
20; GFX12-GISEL:       ; %bb.0: ; %entry
21; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
22; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
23; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
24; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
25; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v0, v[0:1], v2 offset:-16 th:TH_ATOMIC_RETURN
26; GFX12-GISEL-NEXT:    s_endpgm
27entry:
28  %gep = getelementptr i32, ptr %addr, i32 -4
29  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
30  ret void
31}
32
33define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
34; GFX12-SDAG-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
35; GFX12-SDAG:       ; %bb.0: ; %entry
36; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
37; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
38; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
39; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s2
40; GFX12-SDAG-NEXT:    flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
41; GFX12-SDAG-NEXT:    s_endpgm
42;
43; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
44; GFX12-GISEL:       ; %bb.0: ; %entry
45; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
46; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
47; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
48; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
49; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v[0:1], v2 offset:-16
50; GFX12-GISEL-NEXT:    s_endpgm
51entry:
52  %gep = getelementptr i32, ptr %addr, i32 -4
53  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
54  ret void
55}
56
57define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr %use) {
58; GFX12-SDAG-LABEL: flat_atomic_cond_sub_rtn_u32:
59; GFX12-SDAG:       ; %bb.0: ; %entry
60; GFX12-SDAG-NEXT:    s_clause 0x1
61; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
62; GFX12-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
63; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
64; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
65; GFX12-SDAG-NEXT:    v_mov_b32_e32 v2, s2
66; GFX12-SDAG-NEXT:    flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
67; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
68; GFX12-SDAG-NEXT:    s_wait_loadcnt_dscnt 0x0
69; GFX12-SDAG-NEXT:    flat_store_b32 v[0:1], v2
70; GFX12-SDAG-NEXT:    s_endpgm
71;
72; GFX12-GISEL-LABEL: flat_atomic_cond_sub_rtn_u32:
73; GFX12-GISEL:       ; %bb.0: ; %entry
74; GFX12-GISEL-NEXT:    s_clause 0x1
75; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
76; GFX12-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
77; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
78; GFX12-GISEL-NEXT:    v_mov_b32_e32 v0, s0
79; GFX12-GISEL-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
80; GFX12-GISEL-NEXT:    flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
81; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
82; GFX12-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
83; GFX12-GISEL-NEXT:    flat_store_b32 v[0:1], v2
84; GFX12-GISEL-NEXT:    s_endpgm
85entry:
86  %gep = getelementptr i32, ptr %addr, i32 4
87  %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in)
88  store i32 %val, ptr %use
89  ret void
90}
91
92define amdgpu_kernel void @global_atomic_cond_sub_no_rtn_u32(ptr addrspace(1) %addr, i32 %in) {
93; GFX12-SDAG-LABEL: global_atomic_cond_sub_no_rtn_u32:
94; GFX12-SDAG:       ; %bb.0: ; %entry
95; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
96; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
97; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
98; GFX12-SDAG-NEXT:    global_atomic_cond_sub_u32 v0, v0, v1, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
99; GFX12-SDAG-NEXT:    s_endpgm
100;
101; GFX12-GISEL-LABEL: global_atomic_cond_sub_no_rtn_u32:
102; GFX12-GISEL:       ; %bb.0: ; %entry
103; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
104; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
105; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
106; GFX12-GISEL-NEXT:    global_atomic_cond_sub_u32 v0, v1, v0, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
107; GFX12-GISEL-NEXT:    s_endpgm
108entry:
109  %gep = getelementptr i32, ptr addrspace(1) %addr, i32 -4
110  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1) %gep, i32 %in)
111  ret void
112}
113
114define amdgpu_kernel void @global_atomic_cond_sub_no_rtn_u32_forced(ptr addrspace(1) %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
115; GFX12-SDAG-LABEL: global_atomic_cond_sub_no_rtn_u32_forced:
116; GFX12-SDAG:       ; %bb.0: ; %entry
117; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
118; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
119; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
120; GFX12-SDAG-NEXT:    global_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16
121; GFX12-SDAG-NEXT:    s_endpgm
122;
123; GFX12-GISEL-LABEL: global_atomic_cond_sub_no_rtn_u32_forced:
124; GFX12-GISEL:       ; %bb.0: ; %entry
125; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
126; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
127; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
128; GFX12-GISEL-NEXT:    global_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16
129; GFX12-GISEL-NEXT:    s_endpgm
130entry:
131  %gep = getelementptr i32, ptr addrspace(1) %addr, i32 -4
132  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1) %gep, i32 %in)
133  ret void
134}
135
136define amdgpu_kernel void @global_atomic_cond_sub_rtn_u32(ptr addrspace(1) %addr, i32 %in, ptr addrspace(1) %use) {
137; GFX12-SDAG-LABEL: global_atomic_cond_sub_rtn_u32:
138; GFX12-SDAG:       ; %bb.0: ; %entry
139; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
140; GFX12-SDAG-NEXT:    v_mov_b32_e32 v0, 0
141; GFX12-SDAG-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
142; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
143; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s2
144; GFX12-SDAG-NEXT:    global_atomic_cond_sub_u32 v1, v0, v1, s[0:1] offset:16 th:TH_ATOMIC_RETURN
145; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
146; GFX12-SDAG-NEXT:    global_store_b32 v0, v1, s[4:5]
147; GFX12-SDAG-NEXT:    s_endpgm
148;
149; GFX12-GISEL-LABEL: global_atomic_cond_sub_rtn_u32:
150; GFX12-GISEL:       ; %bb.0: ; %entry
151; GFX12-GISEL-NEXT:    s_clause 0x1
152; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
153; GFX12-GISEL-NEXT:    s_load_b64 s[4:5], s[4:5], 0x34
154; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
155; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
156; GFX12-GISEL-NEXT:    global_atomic_cond_sub_u32 v0, v1, v0, s[0:1] offset:16 th:TH_ATOMIC_RETURN
157; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
158; GFX12-GISEL-NEXT:    global_store_b32 v1, v0, s[4:5]
159; GFX12-GISEL-NEXT:    s_endpgm
160entry:
161  %gep = getelementptr i32, ptr addrspace(1) %addr, i32 4
162  %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1) %gep, i32 %in)
163  store i32 %val, ptr addrspace(1) %use
164  ret void
165}
166
167define amdgpu_kernel void @ds_cond_sub_no_rtn_u32(ptr addrspace(3) %addr, i32 %in) {
168; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32:
169; GFX12-SDAG:       ; %bb.0: ; %entry
170; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
171; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
172; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, -16
173; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
174; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
175; GFX12-SDAG-NEXT:    ds_cond_sub_rtn_u32 v0, v0, v1
176; GFX12-SDAG-NEXT:    s_endpgm
177;
178; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32:
179; GFX12-GISEL:       ; %bb.0: ; %entry
180; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
181; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
182; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, -16
183; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
184; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
185; GFX12-GISEL-NEXT:    ds_cond_sub_rtn_u32 v0, v0, v1
186; GFX12-GISEL-NEXT:    s_endpgm
187entry:
188  %gep = getelementptr i32, ptr addrspace(3) %addr, i32 -4
189  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3) %gep, i32 %in)
190  ret void
191}
192
193define amdgpu_kernel void @ds_cond_sub_no_rtn_u32_forced(ptr addrspace(3) %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
194; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32_forced:
195; GFX12-SDAG:       ; %bb.0: ; %entry
196; GFX12-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
197; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
198; GFX12-SDAG-NEXT:    s_add_co_i32 s0, s0, -16
199; GFX12-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
200; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
201; GFX12-SDAG-NEXT:    ds_cond_sub_u32 v0, v1
202; GFX12-SDAG-NEXT:    s_endpgm
203;
204; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32_forced:
205; GFX12-GISEL:       ; %bb.0: ; %entry
206; GFX12-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
207; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
208; GFX12-GISEL-NEXT:    s_add_co_u32 s0, s0, -16
209; GFX12-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
210; GFX12-GISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
211; GFX12-GISEL-NEXT:    ds_cond_sub_u32 v0, v1
212; GFX12-GISEL-NEXT:    s_endpgm
213entry:
214  %gep = getelementptr i32, ptr addrspace(3) %addr, i32 -4
215  %unused = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3) %gep, i32 %in)
216  ret void
217}
218
219define amdgpu_kernel void @ds_cond_sub_rtn_u32(ptr addrspace(3) %addr, i32 %in, ptr addrspace(3) %use) {
220; GFX12-SDAG-LABEL: ds_cond_sub_rtn_u32:
221; GFX12-SDAG:       ; %bb.0: ; %entry
222; GFX12-SDAG-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
223; GFX12-SDAG-NEXT:    s_wait_kmcnt 0x0
224; GFX12-SDAG-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
225; GFX12-SDAG-NEXT:    ds_cond_sub_rtn_u32 v0, v0, v1 offset:16
226; GFX12-SDAG-NEXT:    v_mov_b32_e32 v1, s2
227; GFX12-SDAG-NEXT:    s_wait_dscnt 0x0
228; GFX12-SDAG-NEXT:    ds_store_b32 v1, v0
229; GFX12-SDAG-NEXT:    s_endpgm
230;
231; GFX12-GISEL-LABEL: ds_cond_sub_rtn_u32:
232; GFX12-GISEL:       ; %bb.0: ; %entry
233; GFX12-GISEL-NEXT:    s_load_b96 s[0:2], s[4:5], 0x24
234; GFX12-GISEL-NEXT:    s_wait_kmcnt 0x0
235; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
236; GFX12-GISEL-NEXT:    ds_cond_sub_rtn_u32 v0, v1, v0 offset:16
237; GFX12-GISEL-NEXT:    v_mov_b32_e32 v1, s2
238; GFX12-GISEL-NEXT:    s_wait_dscnt 0x0
239; GFX12-GISEL-NEXT:    ds_store_b32 v1, v0
240; GFX12-GISEL-NEXT:    s_endpgm
241entry:
242  %gep = getelementptr i32, ptr addrspace(3) %addr, i32 4
243  %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3) %gep, i32 %in)
244  store i32 %val, ptr addrspace(3) %use
245  ret void
246}
247