xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_fmin.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1611212fcSMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s
3611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s
4611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
5611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
7611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s
8611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tonga < %s | FileCheck -check-prefix=GFX8 %s
9611212fcSMatt Arsenault; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
10611212fcSMatt Arsenault
11611212fcSMatt Arsenault; TODO: Delete this and add run lines to use *-atomicrmw-fmin.ll tests
12611212fcSMatt Arsenault
13611212fcSMatt Arsenaultdefine float @local_atomic_fmin_ret_f32(ptr addrspace(3) %ptr, float %val) {
14611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_ret_f32:
15611212fcSMatt Arsenault; GFX12:       ; %bb.0:
16611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
17611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
18611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
19611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
20611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
21611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
22611212fcSMatt Arsenault; GFX12-NEXT:    ds_min_num_rtn_f32 v0, v0, v1
23611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_dscnt 0x0
24611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_SE
25611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
26611212fcSMatt Arsenault;
27611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_ret_f32:
28611212fcSMatt Arsenault; GFX940:       ; %bb.0:
29611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30611212fcSMatt Arsenault; GFX940-NEXT:    ds_min_rtn_f32 v0, v0, v1
31611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
32611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
33611212fcSMatt Arsenault;
34611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_ret_f32:
35611212fcSMatt Arsenault; GFX11:       ; %bb.0:
36611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
38611212fcSMatt Arsenault; GFX11-NEXT:    ds_min_rtn_f32 v0, v0, v1
39611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
40611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
41611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
42611212fcSMatt Arsenault;
43611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_ret_f32:
44611212fcSMatt Arsenault; GFX10:       ; %bb.0:
45611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
47611212fcSMatt Arsenault; GFX10-NEXT:    ds_min_rtn_f32 v0, v0, v1
48611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
49611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
50611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
51611212fcSMatt Arsenault;
52611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_ret_f32:
53611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
54611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55611212fcSMatt Arsenault; GFX90A-NEXT:    ds_min_rtn_f32 v0, v0, v1
56611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
57611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
58611212fcSMatt Arsenault;
59611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_ret_f32:
60611212fcSMatt Arsenault; GFX908:       ; %bb.0:
61611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62611212fcSMatt Arsenault; GFX908-NEXT:    ds_min_rtn_f32 v0, v0, v1
63611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
64611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
65611212fcSMatt Arsenault;
66611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_ret_f32:
67611212fcSMatt Arsenault; GFX8:       ; %bb.0:
68611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b32 m0, -1
70611212fcSMatt Arsenault; GFX8-NEXT:    ds_min_rtn_f32 v0, v0, v1
71611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
72611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
73611212fcSMatt Arsenault;
74611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_ret_f32:
75611212fcSMatt Arsenault; GFX7:       ; %bb.0:
76611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 m0, -1
78611212fcSMatt Arsenault; GFX7-NEXT:    ds_min_rtn_f32 v0, v0, v1
79611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
80611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
81611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(3) %ptr, float %val seq_cst
82611212fcSMatt Arsenault  ret float %result
83611212fcSMatt Arsenault}
84611212fcSMatt Arsenault
85611212fcSMatt Arsenaultdefine void @local_atomic_fmin_noret_f32(ptr addrspace(3) %ptr, float %val) {
86611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_noret_f32:
87611212fcSMatt Arsenault; GFX12:       ; %bb.0:
88611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
89611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
90611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
91611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
92611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
93611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
94611212fcSMatt Arsenault; GFX12-NEXT:    ds_min_num_f32 v0, v1
95611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_dscnt 0x0
96611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_SE
97611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
98611212fcSMatt Arsenault;
99611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_noret_f32:
100611212fcSMatt Arsenault; GFX940:       ; %bb.0:
101611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102611212fcSMatt Arsenault; GFX940-NEXT:    ds_min_f32 v0, v1
103611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
104611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
105611212fcSMatt Arsenault;
106611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_noret_f32:
107611212fcSMatt Arsenault; GFX11:       ; %bb.0:
108611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
110611212fcSMatt Arsenault; GFX11-NEXT:    ds_min_f32 v0, v1
111611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
112611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
113611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
114611212fcSMatt Arsenault;
115611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_noret_f32:
116611212fcSMatt Arsenault; GFX10:       ; %bb.0:
117611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
119611212fcSMatt Arsenault; GFX10-NEXT:    ds_min_f32 v0, v1
120611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
121611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
122611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
123611212fcSMatt Arsenault;
124611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_noret_f32:
125611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
126611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127611212fcSMatt Arsenault; GFX90A-NEXT:    ds_min_f32 v0, v1
128611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
129611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
130611212fcSMatt Arsenault;
131611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_noret_f32:
132611212fcSMatt Arsenault; GFX908:       ; %bb.0:
133611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134611212fcSMatt Arsenault; GFX908-NEXT:    ds_min_f32 v0, v1
135611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
136611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
137611212fcSMatt Arsenault;
138611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_noret_f32:
139611212fcSMatt Arsenault; GFX8:       ; %bb.0:
140611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b32 m0, -1
142611212fcSMatt Arsenault; GFX8-NEXT:    ds_min_f32 v0, v1
143611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
144611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
145611212fcSMatt Arsenault;
146611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_noret_f32:
147611212fcSMatt Arsenault; GFX7:       ; %bb.0:
148611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 m0, -1
150611212fcSMatt Arsenault; GFX7-NEXT:    ds_min_f32 v0, v1
151611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
152611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
153611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(3) %ptr, float %val seq_cst
154611212fcSMatt Arsenault  ret void
155611212fcSMatt Arsenault}
156611212fcSMatt Arsenault
157611212fcSMatt Arsenaultdefine double @local_atomic_fmin_ret_f64(ptr addrspace(3) %ptr, double %val) {
158611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_ret_f64:
159611212fcSMatt Arsenault; GFX12:       ; %bb.0:
160611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
161611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
162611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
163611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
164611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
165611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
166611212fcSMatt Arsenault; GFX12-NEXT:    ds_min_num_rtn_f64 v[0:1], v0, v[1:2]
167611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_dscnt 0x0
168611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_SE
169611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
170611212fcSMatt Arsenault;
171611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_ret_f64:
172611212fcSMatt Arsenault; GFX940:       ; %bb.0:
173611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v4, v1
175611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v5, v2
176611212fcSMatt Arsenault; GFX940-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[4:5]
177611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
178611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
179611212fcSMatt Arsenault;
180611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_ret_f64:
181611212fcSMatt Arsenault; GFX11:       ; %bb.0:
182611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
184611212fcSMatt Arsenault; GFX11-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[1:2]
185611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
186611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
187611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
188611212fcSMatt Arsenault;
189611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_ret_f64:
190611212fcSMatt Arsenault; GFX10:       ; %bb.0:
191611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
193611212fcSMatt Arsenault; GFX10-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[1:2]
194611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
195611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
196611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
197611212fcSMatt Arsenault;
198611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_ret_f64:
199611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
200611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
202611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
203611212fcSMatt Arsenault; GFX90A-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[4:5]
204611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
205611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
206611212fcSMatt Arsenault;
207611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_ret_f64:
208611212fcSMatt Arsenault; GFX908:       ; %bb.0:
209611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210611212fcSMatt Arsenault; GFX908-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[1:2]
211611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
212611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
213611212fcSMatt Arsenault;
214611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_ret_f64:
215611212fcSMatt Arsenault; GFX8:       ; %bb.0:
216611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b32 m0, -1
218611212fcSMatt Arsenault; GFX8-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[1:2]
219611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
220611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
221611212fcSMatt Arsenault;
222611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_ret_f64:
223611212fcSMatt Arsenault; GFX7:       ; %bb.0:
224611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 m0, -1
226611212fcSMatt Arsenault; GFX7-NEXT:    ds_min_rtn_f64 v[0:1], v0, v[1:2]
227611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
228611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
229611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(3) %ptr, double %val seq_cst
230611212fcSMatt Arsenault  ret double %result
231611212fcSMatt Arsenault}
232611212fcSMatt Arsenault
233611212fcSMatt Arsenaultdefine void @local_atomic_fmin_noret_f64(ptr addrspace(3) %ptr, double %val) {
234611212fcSMatt Arsenault; GFX12-LABEL: local_atomic_fmin_noret_f64:
235611212fcSMatt Arsenault; GFX12:       ; %bb.0:
236611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
237611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
238611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
239611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
240611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
241611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
242611212fcSMatt Arsenault; GFX12-NEXT:    ds_min_num_f64 v0, v[1:2]
243611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_dscnt 0x0
244611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_SE
245611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
246611212fcSMatt Arsenault;
247611212fcSMatt Arsenault; GFX940-LABEL: local_atomic_fmin_noret_f64:
248611212fcSMatt Arsenault; GFX940:       ; %bb.0:
249611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v4, v1
251611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v5, v2
252611212fcSMatt Arsenault; GFX940-NEXT:    ds_min_f64 v0, v[4:5]
253611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
254611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
255611212fcSMatt Arsenault;
256611212fcSMatt Arsenault; GFX11-LABEL: local_atomic_fmin_noret_f64:
257611212fcSMatt Arsenault; GFX11:       ; %bb.0:
258611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
260611212fcSMatt Arsenault; GFX11-NEXT:    ds_min_f64 v0, v[1:2]
261611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
262611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
263611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
264611212fcSMatt Arsenault;
265611212fcSMatt Arsenault; GFX10-LABEL: local_atomic_fmin_noret_f64:
266611212fcSMatt Arsenault; GFX10:       ; %bb.0:
267611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
269611212fcSMatt Arsenault; GFX10-NEXT:    ds_min_f64 v0, v[1:2]
270611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
271611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
272611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
273611212fcSMatt Arsenault;
274611212fcSMatt Arsenault; GFX90A-LABEL: local_atomic_fmin_noret_f64:
275611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
276611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
278611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
279611212fcSMatt Arsenault; GFX90A-NEXT:    ds_min_f64 v0, v[4:5]
280611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
281611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
282611212fcSMatt Arsenault;
283611212fcSMatt Arsenault; GFX908-LABEL: local_atomic_fmin_noret_f64:
284611212fcSMatt Arsenault; GFX908:       ; %bb.0:
285611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286611212fcSMatt Arsenault; GFX908-NEXT:    ds_min_f64 v0, v[1:2]
287611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt lgkmcnt(0)
288611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
289611212fcSMatt Arsenault;
290611212fcSMatt Arsenault; GFX8-LABEL: local_atomic_fmin_noret_f64:
291611212fcSMatt Arsenault; GFX8:       ; %bb.0:
292611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b32 m0, -1
294611212fcSMatt Arsenault; GFX8-NEXT:    ds_min_f64 v0, v[1:2]
295611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
296611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
297611212fcSMatt Arsenault;
298611212fcSMatt Arsenault; GFX7-LABEL: local_atomic_fmin_noret_f64:
299611212fcSMatt Arsenault; GFX7:       ; %bb.0:
300611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 m0, -1
302611212fcSMatt Arsenault; GFX7-NEXT:    ds_min_f64 v0, v[1:2]
303611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
304611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
305611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(3) %ptr, double %val seq_cst
306611212fcSMatt Arsenault  ret void
307611212fcSMatt Arsenault}
308611212fcSMatt Arsenault
309611212fcSMatt Arsenaultdefine float @global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %val) {
310611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
311611212fcSMatt Arsenault; GFX12:       ; %bb.0:
312611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
313611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
314611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
315611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
316611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
317611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
31841439d5bSMatt Arsenault; GFX12-NEXT:    global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
319611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
320611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
321611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
322611212fcSMatt Arsenault;
323611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
324611212fcSMatt Arsenault; GFX940:       ; %bb.0:
325611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326611212fcSMatt Arsenault; GFX940-NEXT:    global_load_dword v3, v[0:1], off
327611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[0:1], 0
328*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
329611212fcSMatt Arsenault; GFX940-NEXT:  .LBB4_1: ; %atomicrmw.start
330611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
331611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
332611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v5, v3
333*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
334*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
335611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
336611212fcSMatt Arsenault; GFX940-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off sc0
337611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
338611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
339611212fcSMatt Arsenault; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
340611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
341611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
342611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB4_1
343611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
344611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
345611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v0, v3
346611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
347611212fcSMatt Arsenault;
348611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
349611212fcSMatt Arsenault; GFX11:       ; %bb.0:
350611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
35241439d5bSMatt Arsenault; GFX11-NEXT:    global_atomic_min_f32 v0, v[0:1], v2, off glc
353611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
354611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
355611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
356611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
357611212fcSMatt Arsenault;
358611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
359611212fcSMatt Arsenault; GFX10:       ; %bb.0:
360611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
36241439d5bSMatt Arsenault; GFX10-NEXT:    global_atomic_fmin v0, v[0:1], v2, off glc
363611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
364611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
365611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
366611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
367611212fcSMatt Arsenault;
368611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
369611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
370611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371611212fcSMatt Arsenault; GFX90A-NEXT:    global_load_dword v3, v[0:1], off
372611212fcSMatt Arsenault; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
373*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v2
374611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB4_1: ; %atomicrmw.start
375611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
376611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
377611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
378*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v3, v5, v5
379*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v4, v3, v2
380611212fcSMatt Arsenault; GFX90A-NEXT:    global_atomic_cmpswap v3, v[0:1], v[4:5], off glc
381611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
382611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
383611212fcSMatt Arsenault; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
384611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
385611212fcSMatt Arsenault; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
386611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB4_1
387611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
388611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
389611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
390611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
391611212fcSMatt Arsenault;
392611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
393611212fcSMatt Arsenault; GFX908:       ; %bb.0:
394611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395611212fcSMatt Arsenault; GFX908-NEXT:    global_load_dword v3, v[0:1], off
396611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
397*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v2, v2, v2
398611212fcSMatt Arsenault; GFX908-NEXT:  .LBB4_1: ; %atomicrmw.start
399611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
400611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
401611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v4, v3
402*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v3, v4, v4
403*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v3, v3, v2
404611212fcSMatt Arsenault; GFX908-NEXT:    global_atomic_cmpswap v3, v[0:1], v[3:4], off glc
405611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
406611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
407611212fcSMatt Arsenault; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
408611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
409611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
410611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB4_1
411611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
412611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
413611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v0, v3
414611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
415611212fcSMatt Arsenault;
416611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
417611212fcSMatt Arsenault; GFX8:       ; %bb.0:
418611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
420611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
421611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
422611212fcSMatt Arsenault; GFX8-NEXT:  .LBB4_1: ; %atomicrmw.start
423611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
424611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
425611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, v3
426611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v4
427611212fcSMatt Arsenault; GFX8-NEXT:    v_min_f32_e32 v3, v3, v2
428611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
429611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
430611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
431611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
432611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
433611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
434611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB4_1
435611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
436611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
437611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v3
438611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
439611212fcSMatt Arsenault;
440611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
441611212fcSMatt Arsenault; GFX7:       ; %bb.0:
442611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s6, 0
444611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s7, 0xf000
445611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
44641439d5bSMatt Arsenault; GFX7-NEXT:    buffer_atomic_fmin v2, v[0:1], s[4:7], 0 addr64 glc
447611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
448611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
44941439d5bSMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v2
450611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
451611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
452611212fcSMatt Arsenault  ret float %result
453611212fcSMatt Arsenault}
454611212fcSMatt Arsenault
455611212fcSMatt Arsenaultdefine void @global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, float %val) {
456611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
457611212fcSMatt Arsenault; GFX12:       ; %bb.0:
458611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
459611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
460611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
461611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
462611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
463611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
46441439d5bSMatt Arsenault; GFX12-NEXT:    global_atomic_min_num_f32 v[0:1], v2, off scope:SCOPE_DEV
46541439d5bSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
466611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
467611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
468611212fcSMatt Arsenault;
469611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
470611212fcSMatt Arsenault; GFX940:       ; %bb.0:
471611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472*eeac0ffaSNikita Popov; GFX940-NEXT:    global_load_dword v3, v[0:1], off
473611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[0:1], 0
474*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
475611212fcSMatt Arsenault; GFX940-NEXT:  .LBB5_1: ; %atomicrmw.start
476611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
477611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
478*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
479*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
480611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
481*eeac0ffaSNikita Popov; GFX940-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off sc0
482611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
483611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
484*eeac0ffaSNikita Popov; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
485611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
486*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v3, v2
487611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
488611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB5_1
489611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
490611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
491611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
492611212fcSMatt Arsenault;
493611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
494611212fcSMatt Arsenault; GFX11:       ; %bb.0:
495611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
49741439d5bSMatt Arsenault; GFX11-NEXT:    global_atomic_min_f32 v[0:1], v2, off
49841439d5bSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
499611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
500611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
501611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
502611212fcSMatt Arsenault;
503611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
504611212fcSMatt Arsenault; GFX10:       ; %bb.0:
505611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
50741439d5bSMatt Arsenault; GFX10-NEXT:    global_atomic_fmin v[0:1], v2, off
50841439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
509611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
510611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
511611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
512611212fcSMatt Arsenault;
513611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
514611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
515611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
516*eeac0ffaSNikita Popov; GFX90A-NEXT:    global_load_dword v3, v[0:1], off
517611212fcSMatt Arsenault; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
518*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v4, v2, v2
519611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB5_1: ; %atomicrmw.start
520611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
521611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
522*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v3
523*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v4
524*eeac0ffaSNikita Popov; GFX90A-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
525611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
526611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
527*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
528611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
529*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
530611212fcSMatt Arsenault; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
531611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB5_1
532611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
533611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
534611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
535611212fcSMatt Arsenault;
536611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
537611212fcSMatt Arsenault; GFX908:       ; %bb.0:
538611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539*eeac0ffaSNikita Popov; GFX908-NEXT:    global_load_dword v3, v[0:1], off
540611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
541*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v4, v2, v2
542611212fcSMatt Arsenault; GFX908-NEXT:  .LBB5_1: ; %atomicrmw.start
543611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
544611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
545*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v2, v3, v3
546*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v2, v2, v4
547*eeac0ffaSNikita Popov; GFX908-NEXT:    global_atomic_cmpswap v2, v[0:1], v[2:3], off glc
548611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
549611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
550*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
551611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
552*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v3, v2
553611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
554611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB5_1
555611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
556611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
557611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
558611212fcSMatt Arsenault;
559611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
560611212fcSMatt Arsenault; GFX8:       ; %bb.0:
561611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
563611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
564611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v2
565611212fcSMatt Arsenault; GFX8-NEXT:  .LBB5_1: ; %atomicrmw.start
566611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
567611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
568611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v3
569611212fcSMatt Arsenault; GFX8-NEXT:    v_min_f32_e32 v2, v2, v4
570611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
571611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
572611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
573611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
574611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
575611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v2
576611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
577611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB5_1
578611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
579611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
580611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
581611212fcSMatt Arsenault;
582611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
583611212fcSMatt Arsenault; GFX7:       ; %bb.0:
584611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s6, 0
586611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s7, 0xf000
587611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
58841439d5bSMatt Arsenault; GFX7-NEXT:    buffer_atomic_fmin v2, v[0:1], s[4:7], 0 addr64
589611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
590611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
591611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
592611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
593611212fcSMatt Arsenault  ret void
594611212fcSMatt Arsenault}
595611212fcSMatt Arsenault
596611212fcSMatt Arsenaultdefine double @global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %val) {
597611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
598611212fcSMatt Arsenault; GFX12:       ; %bb.0:
599611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
600611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
601611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
602611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
603611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
604611212fcSMatt Arsenault; GFX12-NEXT:    global_load_b64 v[4:5], v[0:1], off
605*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
606611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s0, 0
607611212fcSMatt Arsenault; GFX12-NEXT:  .LBB6_1: ; %atomicrmw.start
608611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
609611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
610611212fcSMatt Arsenault; GFX12-NEXT:    v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4
611*eeac0ffaSNikita Popov; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
612*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
613*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[4:5], v[4:5], v[2:3]
614611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
615b3a44665SPierre van Houtryve; GFX12-NEXT:    global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
616611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
617611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
618611212fcSMatt Arsenault; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
61986627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
620611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
62186627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
622611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
623611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB6_1
624611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
625611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
626611212fcSMatt Arsenault; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
62786627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
628611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
629611212fcSMatt Arsenault;
630611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
631611212fcSMatt Arsenault; GFX940:       ; %bb.0:
632611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
63441439d5bSMatt Arsenault; GFX940-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off sc0
635611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
636611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
637611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
638611212fcSMatt Arsenault;
639611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
640611212fcSMatt Arsenault; GFX11:       ; %bb.0:
641611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642611212fcSMatt Arsenault; GFX11-NEXT:    global_load_b64 v[4:5], v[0:1], off
643*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
644611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s0, 0
645611212fcSMatt Arsenault; GFX11-NEXT:  .LBB6_1: ; %atomicrmw.start
646611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
647611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
648611212fcSMatt Arsenault; GFX11-NEXT:    v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4
649*eeac0ffaSNikita Popov; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
650*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
651*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
652611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
653611212fcSMatt Arsenault; GFX11-NEXT:    global_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7], off glc
654611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
655611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
656611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
657611212fcSMatt Arsenault; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
658611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s0, vcc_lo, s0
659611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
660611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
661611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB6_1
662611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
663611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
664611212fcSMatt Arsenault; GFX11-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
665611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
666611212fcSMatt Arsenault;
667611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
668611212fcSMatt Arsenault; GFX10:       ; %bb.0:
669611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
67141439d5bSMatt Arsenault; GFX10-NEXT:    global_atomic_fmin_x2 v[0:1], v[0:1], v[2:3], off glc
672611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
673611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
674611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
675611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
676611212fcSMatt Arsenault;
677611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
678611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
679611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68041439d5bSMatt Arsenault; GFX90A-NEXT:    global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc
681611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
682611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
683611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
684611212fcSMatt Arsenault;
685611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
686611212fcSMatt Arsenault; GFX908:       ; %bb.0:
687611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688611212fcSMatt Arsenault; GFX908-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off
689*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
690611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
691611212fcSMatt Arsenault; GFX908-NEXT:  .LBB6_1: ; %atomicrmw.start
692611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
693611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
694611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v7, v5
695611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v6, v4
696*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
697*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
698611212fcSMatt Arsenault; GFX908-NEXT:    global_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7], off glc
699611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
700611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
701611212fcSMatt Arsenault; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
702611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
703611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
704611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB6_1
705611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
706611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
707611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v0, v4
708611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v1, v5
709611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
710611212fcSMatt Arsenault;
711611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
712611212fcSMatt Arsenault; GFX8:       ; %bb.0:
713611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
715*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
716611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
717611212fcSMatt Arsenault; GFX8-NEXT:  .LBB6_1: ; %atomicrmw.start
718611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
719611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
720611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
721611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
722*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
723*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
724611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
725611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
726611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
727611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
728611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
729611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
730611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB6_1
731611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
732611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
733611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
734611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
735611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
736611212fcSMatt Arsenault;
737611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
738611212fcSMatt Arsenault; GFX7:       ; %bb.0:
739611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s6, 0
741611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s7, 0xf000
742611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
74341439d5bSMatt Arsenault; GFX7-NEXT:    buffer_atomic_fmin_x2 v[2:3], v[0:1], s[4:7], 0 addr64 glc
744611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
745611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
74641439d5bSMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v0, v2
74741439d5bSMatt Arsenault; GFX7-NEXT:    v_mov_b32_e32 v1, v3
748611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
749611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
750611212fcSMatt Arsenault  ret double %result
751611212fcSMatt Arsenault}
752611212fcSMatt Arsenault
753611212fcSMatt Arsenaultdefine void @global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(1) %ptr, double %val) {
754611212fcSMatt Arsenault; GFX12-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
755611212fcSMatt Arsenault; GFX12:       ; %bb.0:
756611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
757611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
758611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
759611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
760611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
761*eeac0ffaSNikita Popov; GFX12-NEXT:    global_load_b64 v[4:5], v[0:1], off
762*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[2:3], v[2:3]
763611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s0, 0
764611212fcSMatt Arsenault; GFX12-NEXT:  .LBB7_1: ; %atomicrmw.start
765611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
766611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
767*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
768611212fcSMatt Arsenault; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
769*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
770611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
771*eeac0ffaSNikita Popov; GFX12-NEXT:    global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off th:TH_ATOMIC_RETURN scope:SCOPE_DEV
772611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
773611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
774*eeac0ffaSNikita Popov; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
775*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2
77686627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
777611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
77886627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
779611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
780611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB7_1
781611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
782611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
78386627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
784611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
785611212fcSMatt Arsenault;
786611212fcSMatt Arsenault; GFX940-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
787611212fcSMatt Arsenault; GFX940:       ; %bb.0:
788611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
789611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
79041439d5bSMatt Arsenault; GFX940-NEXT:    global_atomic_min_f64 v[0:1], v[2:3], off
791611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
792611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
793611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
794611212fcSMatt Arsenault;
795611212fcSMatt Arsenault; GFX11-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
796611212fcSMatt Arsenault; GFX11:       ; %bb.0:
797611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798*eeac0ffaSNikita Popov; GFX11-NEXT:    global_load_b64 v[4:5], v[0:1], off
799*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
800611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s0, 0
801611212fcSMatt Arsenault; GFX11-NEXT:  .LBB7_1: ; %atomicrmw.start
802611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
803611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
804*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
805611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
806*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
807611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
808*eeac0ffaSNikita Popov; GFX11-NEXT:    global_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5], off glc
809611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
810611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
811611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
812*eeac0ffaSNikita Popov; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
813*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2
814611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s0, vcc_lo, s0
815611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
816611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
817611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB7_1
818611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
819611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
820611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
821611212fcSMatt Arsenault;
822611212fcSMatt Arsenault; GFX10-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
823611212fcSMatt Arsenault; GFX10:       ; %bb.0:
824611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
82641439d5bSMatt Arsenault; GFX10-NEXT:    global_atomic_fmin_x2 v[0:1], v[2:3], off
82741439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
828611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
829611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
830611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
831611212fcSMatt Arsenault;
832611212fcSMatt Arsenault; GFX90A-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
833611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
834611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83541439d5bSMatt Arsenault; GFX90A-NEXT:    global_atomic_min_f64 v[0:1], v[2:3], off
836611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
837611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
838611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
839611212fcSMatt Arsenault;
840611212fcSMatt Arsenault; GFX908-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
841611212fcSMatt Arsenault; GFX908:       ; %bb.0:
842611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
843*eeac0ffaSNikita Popov; GFX908-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off
844*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
845611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
846611212fcSMatt Arsenault; GFX908-NEXT:  .LBB7_1: ; %atomicrmw.start
847611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
848611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
849*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
850*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
851*eeac0ffaSNikita Popov; GFX908-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
852611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
853611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
854*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
855*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v5, v3
856611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
857*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v4, v2
858611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
859611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB7_1
860611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
861611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
862611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
863611212fcSMatt Arsenault;
864611212fcSMatt Arsenault; GFX8-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
865611212fcSMatt Arsenault; GFX8:       ; %bb.0:
866611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
868*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
869611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
870611212fcSMatt Arsenault; GFX8-NEXT:  .LBB7_1: ; %atomicrmw.start
871611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
872611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
873*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
874*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
875*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
876611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
877611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
878*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
879*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, v3
880611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
881*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, v2
882611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
883611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB7_1
884611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
885611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
886611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
887611212fcSMatt Arsenault;
888611212fcSMatt Arsenault; GFX7-LABEL: global_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
889611212fcSMatt Arsenault; GFX7:       ; %bb.0:
890611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s6, 0
892611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b32 s7, 0xf000
893611212fcSMatt Arsenault; GFX7-NEXT:    s_mov_b64 s[4:5], 0
89441439d5bSMatt Arsenault; GFX7-NEXT:    buffer_atomic_fmin_x2 v[2:3], v[0:1], s[4:7], 0 addr64
895611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
896611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
897611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
898611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
899611212fcSMatt Arsenault  ret void
900611212fcSMatt Arsenault}
901611212fcSMatt Arsenault
902611212fcSMatt Arsenaultdefine float @flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr %ptr, float %val) {
903611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
904611212fcSMatt Arsenault; GFX12:       ; %bb.0:
905611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
906611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
907611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
908611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
909611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
910611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
91141439d5bSMatt Arsenault; GFX12-NEXT:    flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV
912611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
913611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
914611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
915611212fcSMatt Arsenault;
916611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
917611212fcSMatt Arsenault; GFX940:       ; %bb.0:
918611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
919611212fcSMatt Arsenault; GFX940-NEXT:    flat_load_dword v3, v[0:1]
920611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[0:1], 0
921*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v2, v2, v2
922611212fcSMatt Arsenault; GFX940-NEXT:  .LBB8_1: ; %atomicrmw.start
923611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
924611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
925611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v5, v3
926*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v3, v5, v5
927*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v4, v3, v2
928611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
929611212fcSMatt Arsenault; GFX940-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] sc0
930611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
931611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
932611212fcSMatt Arsenault; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
933611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
934611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
935611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB8_1
936611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
937611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
938611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v0, v3
939611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
940611212fcSMatt Arsenault;
941611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
942611212fcSMatt Arsenault; GFX11:       ; %bb.0:
943611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
94541439d5bSMatt Arsenault; GFX11-NEXT:    flat_atomic_min_f32 v0, v[0:1], v2 glc
946611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
947611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
948611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
949611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
950611212fcSMatt Arsenault;
951611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
952611212fcSMatt Arsenault; GFX10:       ; %bb.0:
953611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
95541439d5bSMatt Arsenault; GFX10-NEXT:    flat_atomic_fmin v0, v[0:1], v2 glc
956611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
957611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
958611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
959611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
960611212fcSMatt Arsenault;
961611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
962611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
963611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964611212fcSMatt Arsenault; GFX90A-NEXT:    flat_load_dword v3, v[0:1]
965611212fcSMatt Arsenault; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
966*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v2, v2, v2
967611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB8_1: ; %atomicrmw.start
968611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
969611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
970611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
971*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v3, v5, v5
972*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v4, v3, v2
973611212fcSMatt Arsenault; GFX90A-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[4:5] glc
974611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
975611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
976611212fcSMatt Arsenault; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v5
977611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
978611212fcSMatt Arsenault; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
979611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB8_1
980611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
981611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
982611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
983611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
984611212fcSMatt Arsenault;
985611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
986611212fcSMatt Arsenault; GFX908:       ; %bb.0:
987611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988611212fcSMatt Arsenault; GFX908-NEXT:    flat_load_dword v3, v[0:1]
989611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
990*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v2, v2, v2
991611212fcSMatt Arsenault; GFX908-NEXT:  .LBB8_1: ; %atomicrmw.start
992611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
993611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
994611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v4, v3
995*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v3, v4, v4
996*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v3, v3, v2
997611212fcSMatt Arsenault; GFX908-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
998611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
999611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1000611212fcSMatt Arsenault; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
1001611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1002611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1003611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB8_1
1004611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
1005611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1006611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v0, v3
1007611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1008611212fcSMatt Arsenault;
1009611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1010611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1011611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1012611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
1013611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1014611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v2
1015611212fcSMatt Arsenault; GFX8-NEXT:  .LBB8_1: ; %atomicrmw.start
1016611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1017611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1018611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v4, v3
1019611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v4
1020611212fcSMatt Arsenault; GFX8-NEXT:    v_min_f32_e32 v3, v3, v2
1021611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap v3, v[0:1], v[3:4] glc
1022611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1023611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1024611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v4
1025611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1026611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1027611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB8_1
1028611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1029611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1030611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v3
1031611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1032611212fcSMatt Arsenault;
1033611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1034611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1035611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103641439d5bSMatt Arsenault; GFX7-NEXT:    flat_atomic_fmin v0, v[0:1], v2 glc
1037611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1038611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1039611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1040611212fcSMatt Arsenault  %result = atomicrmw fmin ptr %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1041611212fcSMatt Arsenault  ret float %result
1042611212fcSMatt Arsenault}
1043611212fcSMatt Arsenault
1044611212fcSMatt Arsenaultdefine void @flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr %ptr, float %val) {
1045611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1046611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1047611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1048611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1049611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1050611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1051611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
1052611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
105341439d5bSMatt Arsenault; GFX12-NEXT:    flat_atomic_min_num_f32 v[0:1], v2 scope:SCOPE_DEV
105441439d5bSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt_dscnt 0x0
1055611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1056611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1057611212fcSMatt Arsenault;
1058611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1059611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1060611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1061*eeac0ffaSNikita Popov; GFX940-NEXT:    flat_load_dword v3, v[0:1]
1062611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[0:1], 0
1063*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v4, v2, v2
1064611212fcSMatt Arsenault; GFX940-NEXT:  .LBB9_1: ; %atomicrmw.start
1065611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
1066611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1067*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v2, v3, v3
1068*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v2, v2, v4
1069611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
1070*eeac0ffaSNikita Popov; GFX940-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] sc0
1071611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1072611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1073*eeac0ffaSNikita Popov; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
1074611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1075*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1076611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1077611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB9_1
1078611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
1079611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
1080611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1081611212fcSMatt Arsenault;
1082611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1083611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1084611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
108641439d5bSMatt Arsenault; GFX11-NEXT:    flat_atomic_min_f32 v[0:1], v2
108741439d5bSMatt Arsenault; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
108841439d5bSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1089611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1090611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1091611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1092611212fcSMatt Arsenault;
1093611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1094611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1095611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1096611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
109741439d5bSMatt Arsenault; GFX10-NEXT:    flat_atomic_fmin v[0:1], v2
109841439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
109941439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1100611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1101611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1102611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1103611212fcSMatt Arsenault;
1104611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1105611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1106611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107*eeac0ffaSNikita Popov; GFX90A-NEXT:    flat_load_dword v3, v[0:1]
1108611212fcSMatt Arsenault; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
1109*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v4, v2, v2
1110611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB9_1: ; %atomicrmw.start
1111611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
1112611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1113*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v2, v3, v3
1114*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v2, v2, v4
1115*eeac0ffaSNikita Popov; GFX90A-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
1116611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1117611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1118*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
1119611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1120*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1121611212fcSMatt Arsenault; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1122611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB9_1
1123611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
1124611212fcSMatt Arsenault; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
1125611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1126611212fcSMatt Arsenault;
1127611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1128611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1129611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1130*eeac0ffaSNikita Popov; GFX908-NEXT:    flat_load_dword v3, v[0:1]
1131611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1132*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v4, v2, v2
1133611212fcSMatt Arsenault; GFX908-NEXT:  .LBB9_1: ; %atomicrmw.start
1134611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1135611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1136*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v2, v3, v3
1137*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v2, v2, v4
1138*eeac0ffaSNikita Popov; GFX908-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
1139611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1140611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1141*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
1142611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1143*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v3, v2
1144611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1145611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB9_1
1146611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
1147611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1148611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1149611212fcSMatt Arsenault;
1150611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1151611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1152611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v3, v[0:1]
1154611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1155611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v4, 1.0, v2
1156611212fcSMatt Arsenault; GFX8-NEXT:  .LBB9_1: ; %atomicrmw.start
1157611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1158611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1159611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v2, 1.0, v3
1160611212fcSMatt Arsenault; GFX8-NEXT:    v_min_f32_e32 v2, v2, v4
1161611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
1162611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1163611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1164611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v2, v3
1165611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1166611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v3, v2
1167611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1168611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB9_1
1169611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1170611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1171611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1172611212fcSMatt Arsenault;
1173611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1174611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1175611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117641439d5bSMatt Arsenault; GFX7-NEXT:    flat_atomic_fmin v[0:1], v2
1177611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1178611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1179611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1180611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1181611212fcSMatt Arsenault  ret void
1182611212fcSMatt Arsenault}
1183611212fcSMatt Arsenault
1184611212fcSMatt Arsenaultdefine double @flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr %ptr, double %val) {
1185611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1186611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1187611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1188611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1189611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1190611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1191611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
1192611212fcSMatt Arsenault; GFX12-NEXT:    flat_load_b64 v[4:5], v[0:1]
1193*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[2:3], v[2:3]
1194611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s0, 0
1195611212fcSMatt Arsenault; GFX12-NEXT:  .LBB10_1: ; %atomicrmw.start
1196611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
1197611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1198611212fcSMatt Arsenault; GFX12-NEXT:    v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4
1199*eeac0ffaSNikita Popov; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1200*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[6:7], v[6:7]
1201*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[4:5], v[4:5], v[2:3]
1202611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
1203b3a44665SPierre van Houtryve; GFX12-NEXT:    flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1204611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1205611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1206611212fcSMatt Arsenault; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
120786627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1208611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
120986627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1210611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
1211611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB10_1
1212611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
1213611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1214611212fcSMatt Arsenault; GFX12-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
121586627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1216611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1217611212fcSMatt Arsenault;
1218611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1219611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1220611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1221611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
122241439d5bSMatt Arsenault; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] sc0
1223611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1224611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1225611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1226611212fcSMatt Arsenault;
1227611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1228611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1229611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230611212fcSMatt Arsenault; GFX11-NEXT:    flat_load_b64 v[4:5], v[0:1]
1231*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
1232611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s0, 0
1233611212fcSMatt Arsenault; GFX11-NEXT:  .LBB10_1: ; %atomicrmw.start
1234611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1235611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1236611212fcSMatt Arsenault; GFX11-NEXT:    v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v6, v4
1237*eeac0ffaSNikita Popov; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1238*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
1239*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
1240611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1241611212fcSMatt Arsenault; GFX11-NEXT:    flat_atomic_cmpswap_b64 v[4:5], v[0:1], v[4:7] glc
1242611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1243611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1244611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1245611212fcSMatt Arsenault; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[4:5], v[6:7]
1246611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s0, vcc_lo, s0
1247611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1248611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
1249611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB10_1
1250611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
1251611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1252611212fcSMatt Arsenault; GFX11-NEXT:    v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5
1253611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1254611212fcSMatt Arsenault;
1255611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1256611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1257611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
125941439d5bSMatt Arsenault; GFX10-NEXT:    flat_atomic_fmin_x2 v[0:1], v[0:1], v[2:3] glc
1260611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1261611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1262611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1263611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1264611212fcSMatt Arsenault;
1265611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1266611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1267611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126841439d5bSMatt Arsenault; GFX90A-NEXT:    flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc
1269611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1270611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1271611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1272611212fcSMatt Arsenault;
1273611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1274611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1275611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276611212fcSMatt Arsenault; GFX908-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
1277*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
1278611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1279611212fcSMatt Arsenault; GFX908-NEXT:  .LBB10_1: ; %atomicrmw.start
1280611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1281611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1282611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v7, v5
1283611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v6, v4
1284*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
1285*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
1286611212fcSMatt Arsenault; GFX908-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
1287611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1288611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1289611212fcSMatt Arsenault; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1290611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1291611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1292611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB10_1
1293611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
1294611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1295611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v0, v4
1296611212fcSMatt Arsenault; GFX908-NEXT:    v_mov_b32_e32 v1, v5
1297611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1298611212fcSMatt Arsenault;
1299611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1300611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1301611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302611212fcSMatt Arsenault; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
1303611212fcSMatt Arsenault; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
1304611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v4, v[0:1]
1305611212fcSMatt Arsenault; GFX8-NEXT:    flat_load_dword v5, v[5:6]
1306*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[2:3], v[2:3], v[2:3]
1307611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1308611212fcSMatt Arsenault; GFX8-NEXT:  .LBB10_1: ; %atomicrmw.start
1309611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1310611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1311611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v7, v5
1312611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v6, v4
1313*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[4:5], v[6:7], v[6:7]
1314*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[4:5], v[4:5], v[2:3]
1315611212fcSMatt Arsenault; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
1316611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1317611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1318611212fcSMatt Arsenault; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
1319611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1320611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1321611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB10_1
1322611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1323611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1324611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v0, v4
1325611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v5
1326611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1327611212fcSMatt Arsenault;
1328611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1329611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1330611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133141439d5bSMatt Arsenault; GFX7-NEXT:    flat_atomic_fmin_x2 v[0:1], v[0:1], v[2:3] glc
1332611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1333611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1334611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
13351d037087SMatt Arsenault  %result = atomicrmw fmin ptr %ptr, double %val syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1336611212fcSMatt Arsenault  ret double %result
1337611212fcSMatt Arsenault}
1338611212fcSMatt Arsenault
1339611212fcSMatt Arsenaultdefine void @flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr %ptr, double %val) {
1340611212fcSMatt Arsenault; GFX12-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1341611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1342611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1343611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1344611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1345611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1346611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
1347*eeac0ffaSNikita Popov; GFX12-NEXT:    flat_load_b64 v[4:5], v[0:1]
1348*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[6:7], v[2:3], v[2:3]
1349611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s0, 0
1350611212fcSMatt Arsenault; GFX12-NEXT:  .LBB11_1: ; %atomicrmw.start
1351611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
1352611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1353*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[2:3], v[4:5], v[4:5]
1354611212fcSMatt Arsenault; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1355*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[2:3], v[2:3], v[6:7]
1356611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
1357*eeac0ffaSNikita Popov; GFX12-NEXT:    flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] th:TH_ATOMIC_RETURN scope:SCOPE_DEV
1358611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1359611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1360*eeac0ffaSNikita Popov; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
1361*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2
136286627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1363611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s0, vcc_lo, s0
136486627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1365611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
1366611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB11_1
1367611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
1368611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s0
136986627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1370611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1371611212fcSMatt Arsenault;
1372611212fcSMatt Arsenault; GFX940-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1373611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1374611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
137641439d5bSMatt Arsenault; GFX940-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
1377611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1378611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1379611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1380611212fcSMatt Arsenault;
1381611212fcSMatt Arsenault; GFX11-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1382611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1383611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384*eeac0ffaSNikita Popov; GFX11-NEXT:    flat_load_b64 v[4:5], v[0:1]
1385*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
1386611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s0, 0
1387611212fcSMatt Arsenault; GFX11-NEXT:  .LBB11_1: ; %atomicrmw.start
1388611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1389611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1390*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
1391611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1392*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
1393611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1394*eeac0ffaSNikita Popov; GFX11-NEXT:    flat_atomic_cmpswap_b64 v[2:3], v[0:1], v[2:5] glc
1395611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1396611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1397611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1398*eeac0ffaSNikita Popov; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[2:3], v[4:5]
1399*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v5, v3 :: v_dual_mov_b32 v4, v2
1400611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s0, vcc_lo, s0
1401611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1402611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s0
1403611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB11_1
1404611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
1405611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1406611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1407611212fcSMatt Arsenault;
1408611212fcSMatt Arsenault; GFX10-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1409611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1410611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1411611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
141241439d5bSMatt Arsenault; GFX10-NEXT:    flat_atomic_fmin_x2 v[0:1], v[2:3]
141341439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
141441439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1415611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1416611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1417611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1418611212fcSMatt Arsenault;
1419611212fcSMatt Arsenault; GFX90A-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1420611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1421611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142241439d5bSMatt Arsenault; GFX90A-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
1423611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1424611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1425611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1426611212fcSMatt Arsenault;
1427611212fcSMatt Arsenault; GFX908-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1428611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1429611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430*eeac0ffaSNikita Popov; GFX908-NEXT:    flat_load_dwordx2 v[4:5], v[0:1]
1431*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
1432611212fcSMatt Arsenault; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1433611212fcSMatt Arsenault; GFX908-NEXT:  .LBB11_1: ; %atomicrmw.start
1434611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1435611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1436*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
1437*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
1438*eeac0ffaSNikita Popov; GFX908-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
1439611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1440611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1441*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1442*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v5, v3
1443611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1444*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v4, v2
1445611212fcSMatt Arsenault; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1446611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB11_1
1447611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
1448611212fcSMatt Arsenault; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1449611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1450611212fcSMatt Arsenault;
1451611212fcSMatt Arsenault; GFX8-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1452611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1453611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454*eeac0ffaSNikita Popov; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 4, v0
1455*eeac0ffaSNikita Popov; GFX8-NEXT:    v_addc_u32_e32 v6, vcc, 0, v1, vcc
1456*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dword v4, v[0:1]
1457*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_load_dword v5, v[5:6]
1458*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[6:7], v[2:3], v[2:3]
1459611212fcSMatt Arsenault; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1460611212fcSMatt Arsenault; GFX8-NEXT:  .LBB11_1: ; %atomicrmw.start
1461611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1462611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1463*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[2:3], v[4:5], v[4:5]
1464*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[2:3], v[2:3], v[6:7]
1465*eeac0ffaSNikita Popov; GFX8-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
1466611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1467611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1468*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
1469*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, v3
1470611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1471*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, v2
1472611212fcSMatt Arsenault; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1473611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB11_1
1474611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
1475611212fcSMatt Arsenault; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1476611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1477611212fcSMatt Arsenault;
1478611212fcSMatt Arsenault; GFX7-LABEL: flat_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1479611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1480611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148141439d5bSMatt Arsenault; GFX7-NEXT:    flat_atomic_fmin_x2 v[0:1], v[2:3]
1482611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1483611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1484611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
14851d037087SMatt Arsenault  %unused = atomicrmw fmin ptr %ptr, double %val syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1486611212fcSMatt Arsenault  ret void
1487611212fcSMatt Arsenault}
1488611212fcSMatt Arsenault
1489611212fcSMatt Arsenaultdefine float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, float %val) {
1490611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1491611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1492611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1493611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1494611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1495611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1496611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
14976548b635SShilei Tian; GFX12-NEXT:    v_mov_b32_e32 v1, s16
1498611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
149941439d5bSMatt Arsenault; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
1500611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
1501611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1502611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1503611212fcSMatt Arsenault;
1504611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1505611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1506611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1507*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v2, s16
1508*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1509*eeac0ffaSNikita Popov; GFX940-NEXT:    buffer_load_dword v0, v2, s[0:3], 0 offen
1510611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[4:5], 0
1511*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v3, v1, v1
1512611212fcSMatt Arsenault; GFX940-NEXT:  .LBB12_1: ; %atomicrmw.start
1513611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
1514611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
1515611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b32_e32 v5, v0
1516*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v0, v5, v5
1517*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v4, v0, v3
1518611212fcSMatt Arsenault; GFX940-NEXT:    v_mov_b64_e32 v[0:1], v[4:5]
1519611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
1520*eeac0ffaSNikita Popov; GFX940-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[0:3], 0 offen sc0
1521611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
1522611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1523611212fcSMatt Arsenault; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
1524611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1525611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1526611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB12_1
1527611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
1528611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
1529611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1530611212fcSMatt Arsenault;
1531611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1532611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1533611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15346548b635SShilei Tian; GFX11-NEXT:    v_mov_b32_e32 v1, s16
1535611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
153641439d5bSMatt Arsenault; GFX11-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
1537611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
1538611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1539611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1540611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1541611212fcSMatt Arsenault;
1542611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1543611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1544611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15456548b635SShilei Tian; GFX10-NEXT:    v_mov_b32_e32 v1, s20
1546611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
15476548b635SShilei Tian; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], 0 offen glc
1548611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
1549611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1550611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1551611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1552611212fcSMatt Arsenault;
1553611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1554611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1555611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v2, s20
1557*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1558*eeac0ffaSNikita Popov; GFX90A-NEXT:    buffer_load_dword v0, v2, s[16:19], 0 offen
15596548b635SShilei Tian; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
1560*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v3, v1, v1
1561611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB12_1: ; %atomicrmw.start
1562611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
1563611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1564611212fcSMatt Arsenault; GFX90A-NEXT:    v_mov_b32_e32 v5, v0
1565*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v0, v5, v5
1566*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v4, v0, v3
1567611212fcSMatt Arsenault; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], v[4:5], v[4:5] op_sel:[0,1]
1568*eeac0ffaSNikita Popov; GFX90A-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
1569611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1570611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1571611212fcSMatt Arsenault; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
15726548b635SShilei Tian; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
15736548b635SShilei Tian; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1574611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB12_1
1575611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
15766548b635SShilei Tian; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
1577611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1578611212fcSMatt Arsenault;
1579611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1580611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1581611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v2, s20
1583*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v1, v0
1584*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_load_dword v0, v2, s[16:19], 0 offen
15856548b635SShilei Tian; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1586*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v3, v1, v1
1587611212fcSMatt Arsenault; GFX908-NEXT:  .LBB12_1: ; %atomicrmw.start
1588611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1589611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1590*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v5, v0
1591*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v0, v5, v5
1592*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v4, v0, v3
1593*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v0, v4
1594*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v1, v5
1595*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
1596611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1597611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1598*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
15996548b635SShilei Tian; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
16006548b635SShilei Tian; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1601611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB12_1
1602611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
16036548b635SShilei Tian; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1604611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1605611212fcSMatt Arsenault;
1606611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1607611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1608611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1609*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s20
1610611212fcSMatt Arsenault; GFX8-NEXT:    v_mov_b32_e32 v1, v0
1611*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_load_dword v0, v2, s[16:19], 0 offen
16126548b635SShilei Tian; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1613*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v1
1614611212fcSMatt Arsenault; GFX8-NEXT:  .LBB12_1: ; %atomicrmw.start
1615611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1616611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1617*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, v0
1618*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v5
1619*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f32_e32 v4, v0, v3
1620*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, v4
1621*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, v5
1622*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_atomic_cmpswap v[0:1], v2, s[16:19], 0 offen glc
1623611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1624611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1625*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v0, v5
16266548b635SShilei Tian; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
16276548b635SShilei Tian; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1628611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB12_1
1629611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
16306548b635SShilei Tian; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1631611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1632611212fcSMatt Arsenault;
1633611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__amdgpu_no_fine_grained_memory:
1634611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1635611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16366548b635SShilei Tian; GFX7-NEXT:    v_mov_b32_e32 v1, s20
16376548b635SShilei Tian; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], 0 offen glc
1638611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
1639611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1640611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1641611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(7) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1642611212fcSMatt Arsenault  ret float %result
1643611212fcSMatt Arsenault}
1644611212fcSMatt Arsenault
1645611212fcSMatt Arsenaultdefine void @buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, float %val) {
1646611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1647611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1648611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1649611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1650611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1651611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1652611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
16536548b635SShilei Tian; GFX12-NEXT:    v_mov_b32_e32 v1, s16
1654611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
165541439d5bSMatt Arsenault; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
165641439d5bSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
1657611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1658611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1659611212fcSMatt Arsenault;
1660611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1661611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1662611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v2, s16
1664*eeac0ffaSNikita Popov; GFX940-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 offen
1665611212fcSMatt Arsenault; GFX940-NEXT:    s_mov_b64 s[4:5], 0
1666*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v3, v0, v0
1667611212fcSMatt Arsenault; GFX940-NEXT:  .LBB13_1: ; %atomicrmw.start
1668611212fcSMatt Arsenault; GFX940-NEXT:    ; =>This Inner Loop Header: Depth=1
1669611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
1670*eeac0ffaSNikita Popov; GFX940-NEXT:    v_max_f32_e32 v0, v1, v1
1671*eeac0ffaSNikita Popov; GFX940-NEXT:    v_min_f32_e32 v0, v0, v3
1672*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b64_e32 v[4:5], v[0:1]
1673611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
1674*eeac0ffaSNikita Popov; GFX940-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0
1675611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
1676611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1677*eeac0ffaSNikita Popov; GFX940-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
1678611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1679*eeac0ffaSNikita Popov; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1680611212fcSMatt Arsenault; GFX940-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1681611212fcSMatt Arsenault; GFX940-NEXT:    s_cbranch_execnz .LBB13_1
1682611212fcSMatt Arsenault; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
1683611212fcSMatt Arsenault; GFX940-NEXT:    s_or_b64 exec, exec, s[4:5]
1684611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1685611212fcSMatt Arsenault;
1686611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1687611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1688611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16896548b635SShilei Tian; GFX11-NEXT:    v_mov_b32_e32 v1, s16
1690611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
169141439d5bSMatt Arsenault; GFX11-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
169241439d5bSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1693611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1694611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1695611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1696611212fcSMatt Arsenault;
1697611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1698611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1699611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17006548b635SShilei Tian; GFX10-NEXT:    v_mov_b32_e32 v1, s20
1701611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
17026548b635SShilei Tian; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], 0 offen
170341439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1704611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1705611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1706611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1707611212fcSMatt Arsenault;
1708611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1709611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1710611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v2, s20
1712*eeac0ffaSNikita Popov; GFX90A-NEXT:    buffer_load_dword v1, v2, s[16:19], 0 offen
17136548b635SShilei Tian; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
1714*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v3, v0, v0
1715611212fcSMatt Arsenault; GFX90A-NEXT:  .LBB13_1: ; %atomicrmw.start
1716611212fcSMatt Arsenault; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
1717611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1718*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_max_f32_e32 v0, v1, v1
1719*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_min_f32_e32 v0, v0, v3
1720*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1]
1721*eeac0ffaSNikita Popov; GFX90A-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
1722611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1723611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1724*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
17256548b635SShilei Tian; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1726*eeac0ffaSNikita Popov; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
17276548b635SShilei Tian; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1728611212fcSMatt Arsenault; GFX90A-NEXT:    s_cbranch_execnz .LBB13_1
1729611212fcSMatt Arsenault; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
17306548b635SShilei Tian; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
1731611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1732611212fcSMatt Arsenault;
1733611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1734611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1735611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1736*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v2, s20
1737*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_load_dword v1, v2, s[16:19], 0 offen
17386548b635SShilei Tian; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1739*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v3, v0, v0
1740611212fcSMatt Arsenault; GFX908-NEXT:  .LBB13_1: ; %atomicrmw.start
1741611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1742611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1743*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f32_e32 v0, v1, v1
1744*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f32_e32 v0, v0, v3
1745*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v5, v1
1746*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v4, v0
1747*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
1748611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1749611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1750*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
17516548b635SShilei Tian; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1752*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v1, v4
17536548b635SShilei Tian; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1754611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB13_1
1755611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
17566548b635SShilei Tian; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1757611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1758611212fcSMatt Arsenault;
1759611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1760611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1761611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, s20
1763*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_load_dword v1, v2, s[16:19], 0 offen
17646548b635SShilei Tian; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1765*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mul_f32_e32 v3, 1.0, v0
1766611212fcSMatt Arsenault; GFX8-NEXT:  .LBB13_1: ; %atomicrmw.start
1767611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1768611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1769611212fcSMatt Arsenault; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v1
1770*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f32_e32 v0, v0, v3
1771*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v5, v1
1772*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v4, v0
1773*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_atomic_cmpswap v[4:5], v2, s[16:19], 0 offen glc
1774611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1775611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1776*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, v4, v1
17776548b635SShilei Tian; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
1778*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, v4
17796548b635SShilei Tian; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1780611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB13_1
1781611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
17826548b635SShilei Tian; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1783611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1784611212fcSMatt Arsenault;
1785611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f32__amdgpu_no_fine_grained_memory:
1786611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1787611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17886548b635SShilei Tian; GFX7-NEXT:    v_mov_b32_e32 v1, s20
17896548b635SShilei Tian; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[16:19], 0 offen
1790611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
1791611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1792611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1793611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(7) %ptr, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1794611212fcSMatt Arsenault  ret void
1795611212fcSMatt Arsenault}
1796611212fcSMatt Arsenault
1797611212fcSMatt Arsenaultdefine double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, double %val) {
1798611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1799611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1800611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1801611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1802611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1803611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1804611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
1805*eeac0ffaSNikita Popov; GFX12-NEXT:    v_mov_b32_e32 v6, s16
1806*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1
1807611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s4, 0
1808*eeac0ffaSNikita Popov; GFX12-NEXT:    buffer_load_b64 v[0:1], v6, s[0:3], null offen
1809*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[2:3], v[2:3]
1810611212fcSMatt Arsenault; GFX12-NEXT:  .LBB14_1: ; %atomicrmw.start
1811611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
1812611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
1813*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1814611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
1815*eeac0ffaSNikita Popov; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1816*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[9:10], v[9:10]
1817*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[7:8], v[0:1], v[4:5]
1818*eeac0ffaSNikita Popov; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1819*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1820*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
1821*eeac0ffaSNikita Popov; GFX12-NEXT:    buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
1822611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
1823611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1824*eeac0ffaSNikita Popov; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
182586627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1826611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s4, vcc_lo, s4
182786627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1828611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
1829611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB14_1
1830611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
1831611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
183286627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1833611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
1834611212fcSMatt Arsenault;
1835611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1836611212fcSMatt Arsenault; GFX940:       ; %bb.0:
1837611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18386548b635SShilei Tian; GFX940-NEXT:    v_mov_b32_e32 v2, s16
1839611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
184041439d5bSMatt Arsenault; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
1841611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
1842611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
1843611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
1844611212fcSMatt Arsenault;
1845611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1846611212fcSMatt Arsenault; GFX11:       ; %bb.0:
1847611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1848*eeac0ffaSNikita Popov; GFX11-NEXT:    v_mov_b32_e32 v6, s16
1849*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v2, v0 :: v_dual_mov_b32 v3, v1
1850611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s4, 0
1851*eeac0ffaSNikita Popov; GFX11-NEXT:    buffer_load_b64 v[0:1], v6, s[0:3], 0 offen
1852*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[4:5], v[2:3], v[2:3]
1853611212fcSMatt Arsenault; GFX11-NEXT:  .LBB14_1: ; %atomicrmw.start
1854611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1855611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
1856*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0
1857611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1858*eeac0ffaSNikita Popov; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1859*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[0:1], v[9:10], v[9:10]
1860*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[7:8], v[0:1], v[4:5]
1861*eeac0ffaSNikita Popov; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1862*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8
1863*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10
1864*eeac0ffaSNikita Popov; GFX11-NEXT:    buffer_atomic_cmpswap_b64 v[0:3], v6, s[0:3], 0 offen glc
1865611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
1866611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
1867611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
1868*eeac0ffaSNikita Popov; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[9:10]
1869611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s4, vcc_lo, s4
1870611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1871611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
1872611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB14_1
1873611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
1874611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s4
1875611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
1876611212fcSMatt Arsenault;
1877611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1878611212fcSMatt Arsenault; GFX10:       ; %bb.0:
1879611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18806548b635SShilei Tian; GFX10-NEXT:    v_mov_b32_e32 v2, s20
1881611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
18826548b635SShilei Tian; GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen glc
1883611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
1884611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
1885611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
1886611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
1887611212fcSMatt Arsenault;
1888611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1889611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
1890611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18916548b635SShilei Tian; GFX90A-NEXT:    v_mov_b32_e32 v2, s20
18926548b635SShilei Tian; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[16:19], 0 offen glc
1893611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1894611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
1895611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1896611212fcSMatt Arsenault;
1897611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1898611212fcSMatt Arsenault; GFX908:       ; %bb.0:
1899611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1900*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v6, s20
1901*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v2, v0
1902*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v3, v1
1903*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen
1904*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[4:5], v[2:3], v[2:3]
19056548b635SShilei Tian; GFX908-NEXT:    s_mov_b64 s[4:5], 0
1906611212fcSMatt Arsenault; GFX908-NEXT:  .LBB14_1: ; %atomicrmw.start
1907611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
1908611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1909*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v10, v1
1910*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v9, v0
1911*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[0:1], v[9:10], v[9:10]
1912*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[7:8], v[0:1], v[4:5]
1913*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v0, v7
1914*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v1, v8
1915*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v2, v9
1916*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v3, v10
1917*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
1918611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
1919611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
1920*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19216548b635SShilei Tian; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
19226548b635SShilei Tian; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1923611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB14_1
1924611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
19256548b635SShilei Tian; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
1926611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
1927611212fcSMatt Arsenault;
1928611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1929611212fcSMatt Arsenault; GFX8:       ; %bb.0:
1930611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s20
1932*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, v0
1933*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, v1
1934*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_load_dwordx2 v[0:1], v6, s[16:19], 0 offen
1935*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[4:5], v[2:3], v[2:3]
19366548b635SShilei Tian; GFX8-NEXT:    s_mov_b64 s[4:5], 0
1937611212fcSMatt Arsenault; GFX8-NEXT:  .LBB14_1: ; %atomicrmw.start
1938611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
1939611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1940*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v10, v1
1941*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v0
1942*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[0:1], v[9:10], v[9:10]
1943*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[7:8], v[0:1], v[4:5]
1944*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v0, v7
1945*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v1, v8
1946*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, v9
1947*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, v10
1948*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_atomic_cmpswap_x2 v[0:3], v6, s[16:19], 0 offen glc
1949611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
1950611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
1951*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[9:10]
19526548b635SShilei Tian; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
19536548b635SShilei Tian; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
1954611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB14_1
1955611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
19566548b635SShilei Tian; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
1957611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
1958611212fcSMatt Arsenault;
1959611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__amdgpu_no_fine_grained_memory:
1960611212fcSMatt Arsenault; GFX7:       ; %bb.0:
1961611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19626548b635SShilei Tian; GFX7-NEXT:    v_mov_b32_e32 v2, s20
19636548b635SShilei Tian; GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen glc
1964611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
1965611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
1966611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
1967611212fcSMatt Arsenault  %result = atomicrmw fmin ptr addrspace(7) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1968611212fcSMatt Arsenault  ret double %result
1969611212fcSMatt Arsenault}
1970611212fcSMatt Arsenault
1971611212fcSMatt Arsenaultdefine void @buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory(ptr addrspace(7) inreg %ptr, double %val) {
1972611212fcSMatt Arsenault; GFX12-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
1973611212fcSMatt Arsenault; GFX12:       ; %bb.0:
1974611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
1975611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_expcnt 0x0
1976611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_samplecnt 0x0
1977611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_bvhcnt 0x0
1978611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_kmcnt 0x0
1979*eeac0ffaSNikita Popov; GFX12-NEXT:    v_mov_b32_e32 v6, s16
1980*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[4:5], v[0:1], v[0:1]
1981611212fcSMatt Arsenault; GFX12-NEXT:    s_mov_b32 s4, 0
1982*eeac0ffaSNikita Popov; GFX12-NEXT:    buffer_load_b64 v[2:3], v6, s[0:3], null offen
1983611212fcSMatt Arsenault; GFX12-NEXT:  .LBB15_1: ; %atomicrmw.start
1984611212fcSMatt Arsenault; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
1985611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
1986*eeac0ffaSNikita Popov; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], v[2:3], v[2:3]
1987611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_storecnt 0x0
1988924a64a3SPierre van Houtryve; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1989*eeac0ffaSNikita Popov; GFX12-NEXT:    v_min_num_f64_e32 v[0:1], v[0:1], v[4:5]
1990*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
1991*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
1992*eeac0ffaSNikita Popov; GFX12-NEXT:    buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], null offen th:TH_ATOMIC_RETURN
1993611212fcSMatt Arsenault; GFX12-NEXT:    s_wait_loadcnt 0x0
1994611212fcSMatt Arsenault; GFX12-NEXT:    global_inv scope:SCOPE_DEV
1995*eeac0ffaSNikita Popov; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
1996*eeac0ffaSNikita Popov; GFX12-NEXT:    v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
199786627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
1998611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 s4, vcc_lo, s4
199986627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
2000611212fcSMatt Arsenault; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
2001611212fcSMatt Arsenault; GFX12-NEXT:    s_cbranch_execnz .LBB15_1
2002611212fcSMatt Arsenault; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
2003611212fcSMatt Arsenault; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s4
200486627149SCarl Ritson; GFX12-NEXT:    s_wait_alu 0xfffe
2005611212fcSMatt Arsenault; GFX12-NEXT:    s_setpc_b64 s[30:31]
2006611212fcSMatt Arsenault;
2007611212fcSMatt Arsenault; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2008611212fcSMatt Arsenault; GFX940:       ; %bb.0:
2009611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20106548b635SShilei Tian; GFX940-NEXT:    v_mov_b32_e32 v2, s16
2011611212fcSMatt Arsenault; GFX940-NEXT:    buffer_wbl2 sc1
201241439d5bSMatt Arsenault; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
2013611212fcSMatt Arsenault; GFX940-NEXT:    s_waitcnt vmcnt(0)
2014611212fcSMatt Arsenault; GFX940-NEXT:    buffer_inv sc1
2015611212fcSMatt Arsenault; GFX940-NEXT:    s_setpc_b64 s[30:31]
2016611212fcSMatt Arsenault;
2017611212fcSMatt Arsenault; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2018611212fcSMatt Arsenault; GFX11:       ; %bb.0:
2019611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020*eeac0ffaSNikita Popov; GFX11-NEXT:    v_mov_b32_e32 v6, s16
2021*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
2022611212fcSMatt Arsenault; GFX11-NEXT:    s_mov_b32 s4, 0
2023*eeac0ffaSNikita Popov; GFX11-NEXT:    buffer_load_b64 v[2:3], v6, s[0:3], 0 offen
2024611212fcSMatt Arsenault; GFX11-NEXT:  .LBB15_1: ; %atomicrmw.start
2025611212fcSMatt Arsenault; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
2026611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
2027*eeac0ffaSNikita Popov; GFX11-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
2028611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2029611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2030*eeac0ffaSNikita Popov; GFX11-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
2031*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v10, v3 :: v_dual_mov_b32 v9, v2
2032*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v7, v0
2033*eeac0ffaSNikita Popov; GFX11-NEXT:    buffer_atomic_cmpswap_b64 v[7:10], v6, s[0:3], 0 offen glc
2034611212fcSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
2035611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl1_inv
2036611212fcSMatt Arsenault; GFX11-NEXT:    buffer_gl0_inv
2037*eeac0ffaSNikita Popov; GFX11-NEXT:    v_cmp_eq_u64_e32 vcc_lo, v[7:8], v[2:3]
2038*eeac0ffaSNikita Popov; GFX11-NEXT:    v_dual_mov_b32 v2, v7 :: v_dual_mov_b32 v3, v8
2039611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 s4, vcc_lo, s4
2040611212fcSMatt Arsenault; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2041611212fcSMatt Arsenault; GFX11-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s4
2042611212fcSMatt Arsenault; GFX11-NEXT:    s_cbranch_execnz .LBB15_1
2043611212fcSMatt Arsenault; GFX11-NEXT:  ; %bb.2: ; %atomicrmw.end
2044611212fcSMatt Arsenault; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s4
2045611212fcSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
2046611212fcSMatt Arsenault;
2047611212fcSMatt Arsenault; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2048611212fcSMatt Arsenault; GFX10:       ; %bb.0:
2049611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20506548b635SShilei Tian; GFX10-NEXT:    v_mov_b32_e32 v2, s20
2051611212fcSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
20526548b635SShilei Tian; GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen
205341439d5bSMatt Arsenault; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
2054611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl1_inv
2055611212fcSMatt Arsenault; GFX10-NEXT:    buffer_gl0_inv
2056611212fcSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
2057611212fcSMatt Arsenault;
2058611212fcSMatt Arsenault; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2059611212fcSMatt Arsenault; GFX90A:       ; %bb.0:
2060611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20616548b635SShilei Tian; GFX90A-NEXT:    v_mov_b32_e32 v2, s20
20626548b635SShilei Tian; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[16:19], 0 offen
2063611212fcSMatt Arsenault; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2064611212fcSMatt Arsenault; GFX90A-NEXT:    buffer_wbinvl1
2065611212fcSMatt Arsenault; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2066611212fcSMatt Arsenault;
2067611212fcSMatt Arsenault; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2068611212fcSMatt Arsenault; GFX908:       ; %bb.0:
2069611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v6, s20
2071*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_load_dwordx2 v[2:3], v6, s[16:19], 0 offen
2072*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
20736548b635SShilei Tian; GFX908-NEXT:    s_mov_b64 s[4:5], 0
2074611212fcSMatt Arsenault; GFX908-NEXT:  .LBB15_1: ; %atomicrmw.start
2075611212fcSMatt Arsenault; GFX908-NEXT:    ; =>This Inner Loop Header: Depth=1
2076611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
2077*eeac0ffaSNikita Popov; GFX908-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
2078*eeac0ffaSNikita Popov; GFX908-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
2079*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v10, v3
2080*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v9, v2
2081*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v8, v1
2082*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v7, v0
2083*eeac0ffaSNikita Popov; GFX908-NEXT:    buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
2084611212fcSMatt Arsenault; GFX908-NEXT:    s_waitcnt vmcnt(0)
2085611212fcSMatt Arsenault; GFX908-NEXT:    buffer_wbinvl1
2086*eeac0ffaSNikita Popov; GFX908-NEXT:    v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
2087*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v2, v7
20886548b635SShilei Tian; GFX908-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2089*eeac0ffaSNikita Popov; GFX908-NEXT:    v_mov_b32_e32 v3, v8
20906548b635SShilei Tian; GFX908-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2091611212fcSMatt Arsenault; GFX908-NEXT:    s_cbranch_execnz .LBB15_1
2092611212fcSMatt Arsenault; GFX908-NEXT:  ; %bb.2: ; %atomicrmw.end
20936548b635SShilei Tian; GFX908-NEXT:    s_or_b64 exec, exec, s[4:5]
2094611212fcSMatt Arsenault; GFX908-NEXT:    s_setpc_b64 s[30:31]
2095611212fcSMatt Arsenault;
2096611212fcSMatt Arsenault; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2097611212fcSMatt Arsenault; GFX8:       ; %bb.0:
2098611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2099*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v6, s20
2100*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_load_dwordx2 v[2:3], v6, s[16:19], 0 offen
2101*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[4:5], v[0:1], v[0:1]
21026548b635SShilei Tian; GFX8-NEXT:    s_mov_b64 s[4:5], 0
2103611212fcSMatt Arsenault; GFX8-NEXT:  .LBB15_1: ; %atomicrmw.start
2104611212fcSMatt Arsenault; GFX8-NEXT:    ; =>This Inner Loop Header: Depth=1
2105611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
2106*eeac0ffaSNikita Popov; GFX8-NEXT:    v_max_f64 v[0:1], v[2:3], v[2:3]
2107*eeac0ffaSNikita Popov; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], v[4:5]
2108*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v10, v3
2109*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v9, v2
2110*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v8, v1
2111*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v7, v0
2112*eeac0ffaSNikita Popov; GFX8-NEXT:    buffer_atomic_cmpswap_x2 v[7:10], v6, s[16:19], 0 offen glc
2113611212fcSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
2114611212fcSMatt Arsenault; GFX8-NEXT:    buffer_wbinvl1
2115*eeac0ffaSNikita Popov; GFX8-NEXT:    v_cmp_eq_u64_e32 vcc, v[7:8], v[2:3]
2116*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v2, v7
21176548b635SShilei Tian; GFX8-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
2118*eeac0ffaSNikita Popov; GFX8-NEXT:    v_mov_b32_e32 v3, v8
21196548b635SShilei Tian; GFX8-NEXT:    s_andn2_b64 exec, exec, s[4:5]
2120611212fcSMatt Arsenault; GFX8-NEXT:    s_cbranch_execnz .LBB15_1
2121611212fcSMatt Arsenault; GFX8-NEXT:  ; %bb.2: ; %atomicrmw.end
21226548b635SShilei Tian; GFX8-NEXT:    s_or_b64 exec, exec, s[4:5]
2123611212fcSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
2124611212fcSMatt Arsenault;
2125611212fcSMatt Arsenault; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f64__amdgpu_no_fine_grained_memory:
2126611212fcSMatt Arsenault; GFX7:       ; %bb.0:
2127611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21286548b635SShilei Tian; GFX7-NEXT:    v_mov_b32_e32 v2, s20
21296548b635SShilei Tian; GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[16:19], 0 offen
2130611212fcSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
2131611212fcSMatt Arsenault; GFX7-NEXT:    buffer_wbinvl1
2132611212fcSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
2133611212fcSMatt Arsenault  %unused = atomicrmw fmin ptr addrspace(7) %ptr, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
2134611212fcSMatt Arsenault  ret void
2135611212fcSMatt Arsenault}
2136611212fcSMatt Arsenault
2137611212fcSMatt Arsenault!0 = !{}
21381d037087SMatt Arsenault!1 = !{i32 5, i32 6}
2139