xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4; Not supported in gfx8 or gfx9
5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
7; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
8
9define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
10; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
11; GFX6:       ; %bb.0:
12; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc
14; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
15; GFX6-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
18; GFX7:       ; %bb.0:
19; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc
21; GFX7-NEXT:    s_waitcnt vmcnt(0)
22; GFX7-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
25; GFX10:       ; %bb.0:
26; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc
28; GFX10-NEXT:    s_waitcnt vmcnt(0)
29; GFX10-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
32; GFX11:       ; %bb.0:
33; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen glc
35; GFX11-NEXT:    s_waitcnt vmcnt(0)
36; GFX11-NEXT:    s_setpc_b64 s[30:31]
37;
38; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
39; GFX12:       ; %bb.0:
40; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
41; GFX12-NEXT:    s_wait_expcnt 0x0
42; GFX12-NEXT:    s_wait_samplecnt 0x0
43; GFX12-NEXT:    s_wait_bvhcnt 0x0
44; GFX12-NEXT:    s_wait_kmcnt 0x0
45; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_RETURN
46; GFX12-NEXT:    s_wait_loadcnt 0x0
47; GFX12-NEXT:    s_setpc_b64 s[30:31]
48  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
49  ret float %ret
50}
51
52define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
53; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
54; GFX6:       ; %bb.0:
55; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc
57; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
58; GFX6-NEXT:    s_setpc_b64 s[30:31]
59;
60; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
61; GFX7:       ; %bb.0:
62; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc
64; GFX7-NEXT:    s_waitcnt vmcnt(0)
65; GFX7-NEXT:    s_setpc_b64 s[30:31]
66;
67; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
68; GFX10:       ; %bb.0:
69; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256 glc
71; GFX10-NEXT:    s_waitcnt vmcnt(0)
72; GFX10-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
75; GFX11:       ; %bb.0:
76; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 glc
78; GFX11-NEXT:    s_waitcnt vmcnt(0)
79; GFX11-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
82; GFX12:       ; %bb.0:
83; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
84; GFX12-NEXT:    s_wait_expcnt 0x0
85; GFX12-NEXT:    s_wait_samplecnt 0x0
86; GFX12-NEXT:    s_wait_bvhcnt 0x0
87; GFX12-NEXT:    s_wait_kmcnt 0x0
88; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256 th:TH_ATOMIC_RETURN
89; GFX12-NEXT:    s_wait_loadcnt 0x0
90; GFX12-NEXT:    s_setpc_b64 s[30:31]
91  %voffset.add = add i32 %voffset, 256
92  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
93  ret float %ret
94}
95
96define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
97; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
98; GFX6:       ; %bb.0:
99; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; GFX6-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc
101; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
102; GFX6-NEXT:    s_setpc_b64 s[30:31]
103;
104; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
105; GFX7:       ; %bb.0:
106; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc
108; GFX7-NEXT:    s_waitcnt vmcnt(0)
109; GFX7-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
112; GFX10:       ; %bb.0:
113; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen glc
115; GFX10-NEXT:    s_waitcnt vmcnt(0)
116; GFX10-NEXT:    s_setpc_b64 s[30:31]
117;
118; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
119; GFX11:       ; %bb.0:
120; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX11-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], s16 idxen glc
122; GFX11-NEXT:    s_waitcnt vmcnt(0)
123; GFX11-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
126; GFX12:       ; %bb.0:
127; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
128; GFX12-NEXT:    s_wait_expcnt 0x0
129; GFX12-NEXT:    s_wait_samplecnt 0x0
130; GFX12-NEXT:    s_wait_bvhcnt 0x0
131; GFX12-NEXT:    s_wait_kmcnt 0x0
132; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], s16 idxen th:TH_ATOMIC_RETURN
133; GFX12-NEXT:    s_wait_loadcnt 0x0
134; GFX12-NEXT:    s_setpc_b64 s[30:31]
135  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
136  ret float %ret
137}
138
139define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
140; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
141; GFX6:       ; %bb.0:
142; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc
144; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
145; GFX6-NEXT:    s_setpc_b64 s[30:31]
146;
147; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
148; GFX7:       ; %bb.0:
149; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc
151; GFX7-NEXT:    s_waitcnt vmcnt(0)
152; GFX7-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
155; GFX10:       ; %bb.0:
156; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen glc slc
158; GFX10-NEXT:    s_waitcnt vmcnt(0)
159; GFX10-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
162; GFX11:       ; %bb.0:
163; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen glc slc
165; GFX11-NEXT:    s_waitcnt vmcnt(0)
166; GFX11-NEXT:    s_setpc_b64 s[30:31]
167;
168; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
169; GFX12:       ; %bb.0:
170; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
171; GFX12-NEXT:    s_wait_expcnt 0x0
172; GFX12-NEXT:    s_wait_samplecnt 0x0
173; GFX12-NEXT:    s_wait_bvhcnt 0x0
174; GFX12-NEXT:    s_wait_kmcnt 0x0
175; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_NT_RETURN
176; GFX12-NEXT:    s_wait_loadcnt 0x0
177; GFX12-NEXT:    s_setpc_b64 s[30:31]
178  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
179  ret float %ret
180}
181
182define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
183; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
184; GFX6:       ; %bb.0:
185; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen
187; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
188; GFX6-NEXT:    s_setpc_b64 s[30:31]
189;
190; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
191; GFX7:       ; %bb.0:
192; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen
194; GFX7-NEXT:    s_waitcnt vmcnt(0)
195; GFX7-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
198; GFX10:       ; %bb.0:
199; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen
201; GFX10-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
204; GFX11:       ; %bb.0:
205; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen
207; GFX11-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
210; GFX12:       ; %bb.0:
211; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
212; GFX12-NEXT:    s_wait_expcnt 0x0
213; GFX12-NEXT:    s_wait_samplecnt 0x0
214; GFX12-NEXT:    s_wait_bvhcnt 0x0
215; GFX12-NEXT:    s_wait_kmcnt 0x0
216; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen
217; GFX12-NEXT:    s_setpc_b64 s[30:31]
218  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
219  ret void
220}
221
222define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
223; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
224; GFX6:       ; %bb.0:
225; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256
227; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
228; GFX6-NEXT:    s_setpc_b64 s[30:31]
229;
230; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
231; GFX7:       ; %bb.0:
232; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256
234; GFX7-NEXT:    s_waitcnt vmcnt(0)
235; GFX7-NEXT:    s_setpc_b64 s[30:31]
236;
237; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
238; GFX10:       ; %bb.0:
239; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen offset:256
241; GFX10-NEXT:    s_setpc_b64 s[30:31]
242;
243; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
244; GFX11:       ; %bb.0:
245; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256
247; GFX11-NEXT:    s_setpc_b64 s[30:31]
248;
249; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
250; GFX12:       ; %bb.0:
251; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
252; GFX12-NEXT:    s_wait_expcnt 0x0
253; GFX12-NEXT:    s_wait_samplecnt 0x0
254; GFX12-NEXT:    s_wait_bvhcnt 0x0
255; GFX12-NEXT:    s_wait_kmcnt 0x0
256; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen offset:256
257; GFX12-NEXT:    s_setpc_b64 s[30:31]
258  %voffset.add = add i32 %voffset, 256
259  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
260  ret void
261}
262
263; Natural mapping, no voffset
264define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
265; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
266; GFX6:       ; %bb.0:
267; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX6-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen
269; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
270; GFX6-NEXT:    s_setpc_b64 s[30:31]
271;
272; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
273; GFX7:       ; %bb.0:
274; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen
276; GFX7-NEXT:    s_waitcnt vmcnt(0)
277; GFX7-NEXT:    s_setpc_b64 s[30:31]
278;
279; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
280; GFX10:       ; %bb.0:
281; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], s20 idxen
283; GFX10-NEXT:    s_setpc_b64 s[30:31]
284;
285; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
286; GFX11:       ; %bb.0:
287; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; GFX11-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], s16 idxen
289; GFX11-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
292; GFX12:       ; %bb.0:
293; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
294; GFX12-NEXT:    s_wait_expcnt 0x0
295; GFX12-NEXT:    s_wait_samplecnt 0x0
296; GFX12-NEXT:    s_wait_bvhcnt 0x0
297; GFX12-NEXT:    s_wait_kmcnt 0x0
298; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], s16 idxen
299; GFX12-NEXT:    s_setpc_b64 s[30:31]
300  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
301  ret void
302}
303
304define void @struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
305; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
306; GFX6:       ; %bb.0:
307; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX6-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc
309; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
310; GFX6-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
313; GFX7:       ; %bb.0:
314; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX7-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc
316; GFX7-NEXT:    s_waitcnt vmcnt(0)
317; GFX7-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
320; GFX10:       ; %bb.0:
321; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX10-NEXT:    buffer_atomic_fmin v0, v[1:2], s[16:19], s20 idxen offen slc
323; GFX10-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
326; GFX11:       ; %bb.0:
327; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[1:2], s[0:3], s16 idxen offen slc
329; GFX11-NEXT:    s_setpc_b64 s[30:31]
330;
331; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
332; GFX12:       ; %bb.0:
333; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
334; GFX12-NEXT:    s_wait_expcnt 0x0
335; GFX12-NEXT:    s_wait_samplecnt 0x0
336; GFX12-NEXT:    s_wait_bvhcnt 0x0
337; GFX12-NEXT:    s_wait_kmcnt 0x0
338; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[1:2], s[0:3], s16 idxen offen th:TH_ATOMIC_NT
339; GFX12-NEXT:    s_setpc_b64 s[30:31]
340  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
341  ret void
342}
343
344; Test waterfall loop on resource
345define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
346; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
347; GFX6:       ; %bb.0:
348; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX6-NEXT:    s_mov_b64 s[6:7], exec
350; GFX6-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
351; GFX6-NEXT:    v_readfirstlane_b32 s8, v1
352; GFX6-NEXT:    v_readfirstlane_b32 s9, v2
353; GFX6-NEXT:    v_readfirstlane_b32 s10, v3
354; GFX6-NEXT:    v_readfirstlane_b32 s11, v4
355; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
356; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
357; GFX6-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
358; GFX6-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
359; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
360; GFX6-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc
361; GFX6-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
362; GFX6-NEXT:    ; implicit-def: $vgpr5_vgpr6
363; GFX6-NEXT:    s_xor_b64 exec, exec, s[4:5]
364; GFX6-NEXT:    s_cbranch_execnz .LBB8_1
365; GFX6-NEXT:  ; %bb.2:
366; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
367; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
368; GFX6-NEXT:    s_setpc_b64 s[30:31]
369;
370; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
371; GFX7:       ; %bb.0:
372; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373; GFX7-NEXT:    s_mov_b64 s[6:7], exec
374; GFX7-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
375; GFX7-NEXT:    v_readfirstlane_b32 s8, v1
376; GFX7-NEXT:    v_readfirstlane_b32 s9, v2
377; GFX7-NEXT:    v_readfirstlane_b32 s10, v3
378; GFX7-NEXT:    v_readfirstlane_b32 s11, v4
379; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
380; GFX7-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
381; GFX7-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
382; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
383; GFX7-NEXT:    s_waitcnt vmcnt(0)
384; GFX7-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc
385; GFX7-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
386; GFX7-NEXT:    ; implicit-def: $vgpr5_vgpr6
387; GFX7-NEXT:    s_xor_b64 exec, exec, s[4:5]
388; GFX7-NEXT:    s_cbranch_execnz .LBB8_1
389; GFX7-NEXT:  ; %bb.2:
390; GFX7-NEXT:    s_mov_b64 exec, s[6:7]
391; GFX7-NEXT:    s_waitcnt vmcnt(0)
392; GFX7-NEXT:    s_setpc_b64 s[30:31]
393;
394; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
395; GFX10:       ; %bb.0:
396; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; GFX10-NEXT:    s_mov_b32 s5, exec_lo
398; GFX10-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
399; GFX10-NEXT:    v_readfirstlane_b32 s8, v1
400; GFX10-NEXT:    v_readfirstlane_b32 s9, v2
401; GFX10-NEXT:    v_readfirstlane_b32 s10, v3
402; GFX10-NEXT:    v_readfirstlane_b32 s11, v4
403; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2]
404; GFX10-NEXT:    v_cmp_eq_u64_e64 s4, s[10:11], v[3:4]
405; GFX10-NEXT:    s_and_b32 s4, vcc_lo, s4
406; GFX10-NEXT:    s_and_saveexec_b32 s4, s4
407; GFX10-NEXT:    s_waitcnt vmcnt(0)
408; GFX10-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc
409; GFX10-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
410; GFX10-NEXT:    ; implicit-def: $vgpr5_vgpr6
411; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
412; GFX10-NEXT:    s_xor_b32 exec_lo, exec_lo, s4
413; GFX10-NEXT:    s_cbranch_execnz .LBB8_1
414; GFX10-NEXT:  ; %bb.2:
415; GFX10-NEXT:    s_mov_b32 exec_lo, s5
416; GFX10-NEXT:    s_waitcnt vmcnt(0)
417; GFX10-NEXT:    s_setpc_b64 s[30:31]
418;
419; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
420; GFX11:       ; %bb.0:
421; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX11-NEXT:    s_mov_b32 s2, exec_lo
423; GFX11-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
424; GFX11-NEXT:    v_readfirstlane_b32 s4, v1
425; GFX11-NEXT:    v_readfirstlane_b32 s5, v2
426; GFX11-NEXT:    v_readfirstlane_b32 s6, v3
427; GFX11-NEXT:    v_readfirstlane_b32 s7, v4
428; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
429; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
430; GFX11-NEXT:    v_cmp_eq_u64_e64 s1, s[6:7], v[3:4]
431; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
432; GFX11-NEXT:    s_and_b32 s1, vcc_lo, s1
433; GFX11-NEXT:    s_and_saveexec_b32 s1, s1
434; GFX11-NEXT:    s_waitcnt vmcnt(0)
435; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 glc
436; GFX11-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
437; GFX11-NEXT:    ; implicit-def: $vgpr5_vgpr6
438; GFX11-NEXT:    s_xor_b32 exec_lo, exec_lo, s1
439; GFX11-NEXT:    s_cbranch_execnz .LBB8_1
440; GFX11-NEXT:  ; %bb.2:
441; GFX11-NEXT:    s_mov_b32 exec_lo, s2
442; GFX11-NEXT:    s_waitcnt vmcnt(0)
443; GFX11-NEXT:    s_setpc_b64 s[30:31]
444;
445; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset:
446; GFX12:       ; %bb.0:
447; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
448; GFX12-NEXT:    s_wait_expcnt 0x0
449; GFX12-NEXT:    s_wait_samplecnt 0x0
450; GFX12-NEXT:    s_wait_bvhcnt 0x0
451; GFX12-NEXT:    s_wait_kmcnt 0x0
452; GFX12-NEXT:    s_mov_b32 s2, exec_lo
453; GFX12-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
454; GFX12-NEXT:    v_readfirstlane_b32 s4, v1
455; GFX12-NEXT:    v_readfirstlane_b32 s5, v2
456; GFX12-NEXT:    v_readfirstlane_b32 s6, v3
457; GFX12-NEXT:    v_readfirstlane_b32 s7, v4
458; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
459; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
460; GFX12-NEXT:    v_cmp_eq_u64_e64 s1, s[6:7], v[3:4]
461; GFX12-NEXT:    s_wait_alu 0xfffe
462; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
463; GFX12-NEXT:    s_and_b32 s1, vcc_lo, s1
464; GFX12-NEXT:    s_wait_alu 0xfffe
465; GFX12-NEXT:    s_and_saveexec_b32 s1, s1
466; GFX12-NEXT:    s_wait_loadcnt 0x0
467; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 th:TH_ATOMIC_RETURN
468; GFX12-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
469; GFX12-NEXT:    ; implicit-def: $vgpr5_vgpr6
470; GFX12-NEXT:    s_wait_alu 0xfffe
471; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s1
472; GFX12-NEXT:    s_cbranch_execnz .LBB8_1
473; GFX12-NEXT:  ; %bb.2:
474; GFX12-NEXT:    s_mov_b32 exec_lo, s2
475; GFX12-NEXT:    s_wait_loadcnt 0x0
476; GFX12-NEXT:    s_wait_alu 0xfffe
477; GFX12-NEXT:    s_setpc_b64 s[30:31]
478  %voffset.add = add i32 %voffset, 256
479  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
480  ret float %ret
481}
482
483; Test waterfall loop on soffset
484define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
485; GFX6-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
486; GFX6:       ; %bb.0:
487; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX6-NEXT:    s_mov_b64 s[6:7], exec
489; GFX6-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
490; GFX6-NEXT:    v_readfirstlane_b32 s8, v1
491; GFX6-NEXT:    v_readfirstlane_b32 s9, v2
492; GFX6-NEXT:    v_readfirstlane_b32 s10, v3
493; GFX6-NEXT:    v_readfirstlane_b32 s11, v4
494; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
495; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
496; GFX6-NEXT:    v_readfirstlane_b32 s12, v7
497; GFX6-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
498; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s12, v7
499; GFX6-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
500; GFX6-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
501; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
502; GFX6-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc
503; GFX6-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
504; GFX6-NEXT:    ; implicit-def: $vgpr7
505; GFX6-NEXT:    ; implicit-def: $vgpr5_vgpr6
506; GFX6-NEXT:    s_xor_b64 exec, exec, s[4:5]
507; GFX6-NEXT:    s_cbranch_execnz .LBB9_1
508; GFX6-NEXT:  ; %bb.2:
509; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
510; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
511; GFX6-NEXT:    s_setpc_b64 s[30:31]
512;
513; GFX7-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
514; GFX7:       ; %bb.0:
515; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516; GFX7-NEXT:    s_mov_b64 s[6:7], exec
517; GFX7-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
518; GFX7-NEXT:    v_readfirstlane_b32 s8, v1
519; GFX7-NEXT:    v_readfirstlane_b32 s9, v2
520; GFX7-NEXT:    v_readfirstlane_b32 s10, v3
521; GFX7-NEXT:    v_readfirstlane_b32 s11, v4
522; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
523; GFX7-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[3:4]
524; GFX7-NEXT:    v_readfirstlane_b32 s12, v7
525; GFX7-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
526; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, s12, v7
527; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
528; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
529; GFX7-NEXT:    s_waitcnt vmcnt(0)
530; GFX7-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s12 idxen offen offset:256 glc
531; GFX7-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
532; GFX7-NEXT:    ; implicit-def: $vgpr7
533; GFX7-NEXT:    ; implicit-def: $vgpr5_vgpr6
534; GFX7-NEXT:    s_xor_b64 exec, exec, s[4:5]
535; GFX7-NEXT:    s_cbranch_execnz .LBB9_1
536; GFX7-NEXT:  ; %bb.2:
537; GFX7-NEXT:    s_mov_b64 exec, s[6:7]
538; GFX7-NEXT:    s_waitcnt vmcnt(0)
539; GFX7-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
542; GFX10:       ; %bb.0:
543; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX10-NEXT:    s_mov_b32 s6, exec_lo
545; GFX10-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
546; GFX10-NEXT:    v_readfirstlane_b32 s8, v1
547; GFX10-NEXT:    v_readfirstlane_b32 s9, v2
548; GFX10-NEXT:    v_readfirstlane_b32 s10, v3
549; GFX10-NEXT:    v_readfirstlane_b32 s11, v4
550; GFX10-NEXT:    v_readfirstlane_b32 s7, v7
551; GFX10-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2]
552; GFX10-NEXT:    v_cmp_eq_u64_e64 s4, s[10:11], v[3:4]
553; GFX10-NEXT:    v_cmp_eq_u32_e64 s5, s7, v7
554; GFX10-NEXT:    s_and_b32 s4, vcc_lo, s4
555; GFX10-NEXT:    s_and_b32 s4, s4, s5
556; GFX10-NEXT:    s_and_saveexec_b32 s4, s4
557; GFX10-NEXT:    s_waitcnt vmcnt(0)
558; GFX10-NEXT:    buffer_atomic_fmin v0, v[5:6], s[8:11], s7 idxen offen offset:256 glc
559; GFX10-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
560; GFX10-NEXT:    ; implicit-def: $vgpr7
561; GFX10-NEXT:    ; implicit-def: $vgpr5_vgpr6
562; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
563; GFX10-NEXT:    s_xor_b32 exec_lo, exec_lo, s4
564; GFX10-NEXT:    s_cbranch_execnz .LBB9_1
565; GFX10-NEXT:  ; %bb.2:
566; GFX10-NEXT:    s_mov_b32 exec_lo, s6
567; GFX10-NEXT:    s_waitcnt vmcnt(0)
568; GFX10-NEXT:    s_setpc_b64 s[30:31]
569;
570; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
571; GFX11:       ; %bb.0:
572; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573; GFX11-NEXT:    s_mov_b32 s2, exec_lo
574; GFX11-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
575; GFX11-NEXT:    v_readfirstlane_b32 s4, v1
576; GFX11-NEXT:    v_readfirstlane_b32 s5, v2
577; GFX11-NEXT:    v_readfirstlane_b32 s6, v3
578; GFX11-NEXT:    v_readfirstlane_b32 s7, v4
579; GFX11-NEXT:    v_readfirstlane_b32 s3, v7
580; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
581; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
582; GFX11-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[3:4]
583; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
584; GFX11-NEXT:    v_cmp_eq_u32_e64 s1, s3, v7
585; GFX11-NEXT:    s_and_b32 s0, vcc_lo, s0
586; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
587; GFX11-NEXT:    s_and_b32 s0, s0, s1
588; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
589; GFX11-NEXT:    s_and_saveexec_b32 s0, s0
590; GFX11-NEXT:    s_waitcnt vmcnt(0)
591; GFX11-NEXT:    buffer_atomic_min_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 glc
592; GFX11-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
593; GFX11-NEXT:    ; implicit-def: $vgpr7
594; GFX11-NEXT:    ; implicit-def: $vgpr5_vgpr6
595; GFX11-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
596; GFX11-NEXT:    s_cbranch_execnz .LBB9_1
597; GFX11-NEXT:  ; %bb.2:
598; GFX11-NEXT:    s_mov_b32 exec_lo, s2
599; GFX11-NEXT:    s_waitcnt vmcnt(0)
600; GFX11-NEXT:    s_setpc_b64 s[30:31]
601;
602; GFX12-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset:
603; GFX12:       ; %bb.0:
604; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
605; GFX12-NEXT:    s_wait_expcnt 0x0
606; GFX12-NEXT:    s_wait_samplecnt 0x0
607; GFX12-NEXT:    s_wait_bvhcnt 0x0
608; GFX12-NEXT:    s_wait_kmcnt 0x0
609; GFX12-NEXT:    s_mov_b32 s2, exec_lo
610; GFX12-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
611; GFX12-NEXT:    v_readfirstlane_b32 s4, v1
612; GFX12-NEXT:    v_readfirstlane_b32 s5, v2
613; GFX12-NEXT:    v_readfirstlane_b32 s6, v3
614; GFX12-NEXT:    v_readfirstlane_b32 s7, v4
615; GFX12-NEXT:    v_readfirstlane_b32 s3, v7
616; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
617; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2]
618; GFX12-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[3:4]
619; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
620; GFX12-NEXT:    v_cmp_eq_u32_e64 s1, s3, v7
621; GFX12-NEXT:    s_wait_alu 0xfffe
622; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
623; GFX12-NEXT:    s_wait_alu 0xfffe
624; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
625; GFX12-NEXT:    s_and_b32 s0, s0, s1
626; GFX12-NEXT:    s_wait_alu 0xfffe
627; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
628; GFX12-NEXT:    s_wait_loadcnt 0x0
629; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 th:TH_ATOMIC_RETURN
630; GFX12-NEXT:    ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4
631; GFX12-NEXT:    ; implicit-def: $vgpr7
632; GFX12-NEXT:    ; implicit-def: $vgpr5_vgpr6
633; GFX12-NEXT:    s_wait_alu 0xfffe
634; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
635; GFX12-NEXT:    s_cbranch_execnz .LBB9_1
636; GFX12-NEXT:  ; %bb.2:
637; GFX12-NEXT:    s_mov_b32 exec_lo, s2
638; GFX12-NEXT:    s_wait_loadcnt 0x0
639; GFX12-NEXT:    s_wait_alu 0xfffe
640; GFX12-NEXT:    s_setpc_b64 s[30:31]
641  %voffset.add = add i32 %voffset, 256
642  %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
643  ret float %ret
644}
645
646declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
647