xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX12
3
4define float @raw_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
5; GFX12-LABEL: raw_buffer_atomic_cond_sub_return:
6; GFX12:       ; %bb.0: ; %main_body
7; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
8; GFX12-NEXT:    s_wait_expcnt 0x0
9; GFX12-NEXT:    s_wait_samplecnt 0x0
10; GFX12-NEXT:    s_wait_bvhcnt 0x0
11; GFX12-NEXT:    s_wait_kmcnt 0x0
12; GFX12-NEXT:    v_mov_b32_e32 v0, s16
13; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], null th:TH_ATOMIC_RETURN
14; GFX12-NEXT:    s_wait_loadcnt 0x0
15; GFX12-NEXT:    s_setpc_b64 s[30:31]
16main_body:
17  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
18  %r = bitcast i32 %orig to float
19  ret float %r
20}
21
22define void @raw_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
23; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return:
24; GFX12:       ; %bb.0: ; %main_body
25; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
26; GFX12-NEXT:    s_wait_expcnt 0x0
27; GFX12-NEXT:    s_wait_samplecnt 0x0
28; GFX12-NEXT:    s_wait_bvhcnt 0x0
29; GFX12-NEXT:    s_wait_kmcnt 0x0
30; GFX12-NEXT:    v_mov_b32_e32 v0, s16
31; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], null th:TH_ATOMIC_RETURN
32; GFX12-NEXT:    s_wait_loadcnt 0x0
33; GFX12-NEXT:    s_setpc_b64 s[30:31]
34main_body:
35  %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
36  ret void
37}
38
39define void @raw_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
40; GFX12-LABEL: raw_buffer_atomic_cond_sub_no_return_forced:
41; GFX12:       ; %bb.0: ; %main_body
42; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
43; GFX12-NEXT:    s_wait_expcnt 0x0
44; GFX12-NEXT:    s_wait_samplecnt 0x0
45; GFX12-NEXT:    s_wait_bvhcnt 0x0
46; GFX12-NEXT:    s_wait_kmcnt 0x0
47; GFX12-NEXT:    v_mov_b32_e32 v0, s16
48; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], null
49; GFX12-NEXT:    s_setpc_b64 s[30:31]
50main_body:
51  %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0)
52  ret void
53}
54
55define float @raw_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
56; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_return:
57; GFX12:       ; %bb.0: ; %main_body
58; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
59; GFX12-NEXT:    s_wait_expcnt 0x0
60; GFX12-NEXT:    s_wait_samplecnt 0x0
61; GFX12-NEXT:    s_wait_bvhcnt 0x0
62; GFX12-NEXT:    s_wait_kmcnt 0x0
63; GFX12-NEXT:    v_mov_b32_e32 v0, s16
64; GFX12-NEXT:    s_mov_b32 s4, 4
65; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4 th:TH_ATOMIC_RETURN
66; GFX12-NEXT:    s_wait_loadcnt 0x0
67; GFX12-NEXT:    s_wait_alu 0xfffe
68; GFX12-NEXT:    s_setpc_b64 s[30:31]
69main_body:
70  %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0)
71  %r = bitcast i32 %orig to float
72  ret float %r
73}
74
75define void @raw_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
76; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return:
77; GFX12:       ; %bb.0: ; %main_body
78; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
79; GFX12-NEXT:    s_wait_expcnt 0x0
80; GFX12-NEXT:    s_wait_samplecnt 0x0
81; GFX12-NEXT:    s_wait_bvhcnt 0x0
82; GFX12-NEXT:    s_wait_kmcnt 0x0
83; GFX12-NEXT:    v_mov_b32_e32 v0, s16
84; GFX12-NEXT:    s_mov_b32 s4, 4
85; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4 th:TH_ATOMIC_RETURN
86; GFX12-NEXT:    s_wait_loadcnt 0x0
87; GFX12-NEXT:    s_wait_alu 0xfffe
88; GFX12-NEXT:    s_setpc_b64 s[30:31]
89main_body:
90  %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0)
91  ret void
92}
93
94define void @raw_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
95; GFX12-LABEL: raw_buffer_atomic_cond_sub_imm_soff_no_return_forced:
96; GFX12:       ; %bb.0: ; %main_body
97; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
98; GFX12-NEXT:    s_wait_expcnt 0x0
99; GFX12-NEXT:    s_wait_samplecnt 0x0
100; GFX12-NEXT:    s_wait_bvhcnt 0x0
101; GFX12-NEXT:    s_wait_kmcnt 0x0
102; GFX12-NEXT:    v_mov_b32_e32 v0, s16
103; GFX12-NEXT:    s_mov_b32 s4, 4
104; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, off, s[0:3], s4
105; GFX12-NEXT:    s_wait_alu 0xfffe
106; GFX12-NEXT:    s_setpc_b64 s[30:31]
107main_body:
108  %unused = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 4, i32 0)
109  ret void
110}
111
112define float @struct_buffer_atomic_cond_sub_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
113; GFX12-LABEL: struct_buffer_atomic_cond_sub_return:
114; GFX12:       ; %bb.0: ; %main_body
115; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
116; GFX12-NEXT:    s_wait_expcnt 0x0
117; GFX12-NEXT:    s_wait_samplecnt 0x0
118; GFX12-NEXT:    s_wait_bvhcnt 0x0
119; GFX12-NEXT:    s_wait_kmcnt 0x0
120; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s16
121; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, v1, s[0:3], null idxen th:TH_ATOMIC_RETURN
122; GFX12-NEXT:    s_wait_loadcnt 0x0
123; GFX12-NEXT:    s_setpc_b64 s[30:31]
124main_body:
125  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
126  %r = bitcast i32 %orig to float
127  ret float %r
128}
129
130define void @struct_buffer_atomic_cond_sub_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
131; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return:
132; GFX12:       ; %bb.0: ; %main_body
133; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
134; GFX12-NEXT:    s_wait_expcnt 0x0
135; GFX12-NEXT:    s_wait_samplecnt 0x0
136; GFX12-NEXT:    s_wait_bvhcnt 0x0
137; GFX12-NEXT:    s_wait_kmcnt 0x0
138; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16
139; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v1, v0, s[0:3], null idxen th:TH_ATOMIC_RETURN
140; GFX12-NEXT:    s_wait_loadcnt 0x0
141; GFX12-NEXT:    s_setpc_b64 s[30:31]
142main_body:
143  %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
144  ret void
145}
146
147define void @struct_buffer_atomic_cond_sub_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
148; GFX12-LABEL: struct_buffer_atomic_cond_sub_no_return_forced:
149; GFX12:       ; %bb.0: ; %main_body
150; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
151; GFX12-NEXT:    s_wait_expcnt 0x0
152; GFX12-NEXT:    s_wait_samplecnt 0x0
153; GFX12-NEXT:    s_wait_bvhcnt 0x0
154; GFX12-NEXT:    s_wait_kmcnt 0x0
155; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16
156; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v1, v0, s[0:3], null idxen
157; GFX12-NEXT:    s_setpc_b64 s[30:31]
158main_body:
159  %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
160  ret void
161}
162
163define float @struct_buffer_atomic_cond_sub_imm_soff_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
164; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_return:
165; GFX12:       ; %bb.0: ; %main_body
166; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
167; GFX12-NEXT:    s_wait_expcnt 0x0
168; GFX12-NEXT:    s_wait_samplecnt 0x0
169; GFX12-NEXT:    s_wait_bvhcnt 0x0
170; GFX12-NEXT:    s_wait_kmcnt 0x0
171; GFX12-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s16
172; GFX12-NEXT:    s_mov_b32 s4, 4
173; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v0, v1, s[0:3], s4 idxen th:TH_ATOMIC_RETURN
174; GFX12-NEXT:    s_wait_loadcnt 0x0
175; GFX12-NEXT:    s_wait_alu 0xfffe
176; GFX12-NEXT:    s_setpc_b64 s[30:31]
177main_body:
178  %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0)
179  %r = bitcast i32 %orig to float
180  ret float %r
181}
182
183define void @struct_buffer_atomic_cond_sub_imm_soff_no_return(<4 x i32> inreg %rsrc, i32 inreg %data) #0 {
184; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return:
185; GFX12:       ; %bb.0: ; %main_body
186; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
187; GFX12-NEXT:    s_wait_expcnt 0x0
188; GFX12-NEXT:    s_wait_samplecnt 0x0
189; GFX12-NEXT:    s_wait_bvhcnt 0x0
190; GFX12-NEXT:    s_wait_kmcnt 0x0
191; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16
192; GFX12-NEXT:    s_mov_b32 s4, 4
193; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v1, v0, s[0:3], s4 idxen th:TH_ATOMIC_RETURN
194; GFX12-NEXT:    s_wait_loadcnt 0x0
195; GFX12-NEXT:    s_wait_alu 0xfffe
196; GFX12-NEXT:    s_setpc_b64 s[30:31]
197main_body:
198  %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0)
199  ret void
200}
201
202define void @struct_buffer_atomic_cond_sub_imm_soff_no_return_forced(<4 x i32> inreg %rsrc, i32 inreg %data) #1 {
203; GFX12-LABEL: struct_buffer_atomic_cond_sub_imm_soff_no_return_forced:
204; GFX12:       ; %bb.0: ; %main_body
205; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
206; GFX12-NEXT:    s_wait_expcnt 0x0
207; GFX12-NEXT:    s_wait_samplecnt 0x0
208; GFX12-NEXT:    s_wait_bvhcnt 0x0
209; GFX12-NEXT:    s_wait_kmcnt 0x0
210; GFX12-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s16
211; GFX12-NEXT:    s_mov_b32 s4, 4
212; GFX12-NEXT:    buffer_atomic_cond_sub_u32 v1, v0, s[0:3], s4 idxen
213; GFX12-NEXT:    s_wait_alu 0xfffe
214; GFX12-NEXT:    s_setpc_b64 s[30:31]
215main_body:
216  %unused = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 4, i32 0)
217  ret void
218}
219
220declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) #0
221declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
222
223attributes #0 = { nounwind }
224attributes #1 = { nounwind "target-features"="+atomic-csub-no-rtn-insts" }
225
226