xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=GFX7
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
8
9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=G_GFX7
11; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
12; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
13; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
14; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
15
16declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
17declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
18
19
20define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
21; SI-LABEL: raw_buffer_atomic_min_noret_f32:
22; SI:       ; %bb.0: ; %main_body
23; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
24; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
25; SI-NEXT:    s_waitcnt lgkmcnt(0)
26; SI-NEXT:    v_mov_b32_e32 v0, s6
27; SI-NEXT:    v_mov_b32_e32 v1, s7
28; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
29; SI-NEXT:    s_endpgm
30;
31; GFX7-LABEL: raw_buffer_atomic_min_noret_f32:
32; GFX7:       ; %bb.0: ; %main_body
33; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
34; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
35; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX7-NEXT:    v_mov_b32_e32 v0, s6
37; GFX7-NEXT:    v_mov_b32_e32 v1, s7
38; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
39; GFX7-NEXT:    s_endpgm
40;
41; GFX10-LABEL: raw_buffer_atomic_min_noret_f32:
42; GFX10:       ; %bb.0: ; %main_body
43; GFX10-NEXT:    s_clause 0x1
44; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
45; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
46; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
47; GFX10-NEXT:    v_mov_b32_e32 v0, s6
48; GFX10-NEXT:    v_mov_b32_e32 v1, s7
49; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
50; GFX10-NEXT:    s_endpgm
51;
52; GFX1030-LABEL: raw_buffer_atomic_min_noret_f32:
53; GFX1030:       ; %bb.0: ; %main_body
54; GFX1030-NEXT:    s_clause 0x1
55; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
56; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
57; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
58; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
59; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
60; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
61; GFX1030-NEXT:    s_endpgm
62;
63; GFX1100-LABEL: raw_buffer_atomic_min_noret_f32:
64; GFX1100:       ; %bb.0: ; %main_body
65; GFX1100-NEXT:    s_clause 0x1
66; GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
67; GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
68; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
69; GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
70; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
71; GFX1100-NEXT:    s_endpgm
72;
73; GFX12-LABEL: raw_buffer_atomic_min_noret_f32:
74; GFX12:       ; %bb.0: ; %main_body
75; GFX12-NEXT:    s_clause 0x1
76; GFX12-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
77; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
78; GFX12-NEXT:    s_wait_kmcnt 0x0
79; GFX12-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
80; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
81; GFX12-NEXT:    s_endpgm
82;
83; G_SI-LABEL: raw_buffer_atomic_min_noret_f32:
84; G_SI:       ; %bb.0: ; %main_body
85; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
86; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
87; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
88; G_SI-NEXT:    v_mov_b32_e32 v0, s6
89; G_SI-NEXT:    v_mov_b32_e32 v1, s7
90; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
91; G_SI-NEXT:    s_endpgm
92;
93; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f32:
94; G_GFX7:       ; %bb.0: ; %main_body
95; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
96; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
97; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
98; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
99; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
100; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
101; G_GFX7-NEXT:    s_endpgm
102;
103; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f32:
104; G_GFX10:       ; %bb.0: ; %main_body
105; G_GFX10-NEXT:    s_clause 0x1
106; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
107; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
108; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
109; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
110; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
111; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
112; G_GFX10-NEXT:    s_endpgm
113;
114; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f32:
115; G_GFX1030:       ; %bb.0: ; %main_body
116; G_GFX1030-NEXT:    s_clause 0x1
117; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
118; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
119; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
120; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
121; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
122; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
123; G_GFX1030-NEXT:    s_endpgm
124;
125; G_GFX1100-LABEL: raw_buffer_atomic_min_noret_f32:
126; G_GFX1100:       ; %bb.0: ; %main_body
127; G_GFX1100-NEXT:    s_clause 0x1
128; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
129; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
130; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
131; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
132; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
133; G_GFX1100-NEXT:    s_endpgm
134main_body:
135  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
136  ret void
137}
138
139define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
140; SI-LABEL: raw_buffer_atomic_min_rtn_f32:
141; SI:       ; %bb.0: ; %main_body
142; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
143; SI-NEXT:    s_mov_b32 s3, 0xf000
144; SI-NEXT:    s_mov_b32 s2, -1
145; SI-NEXT:    s_waitcnt vmcnt(0)
146; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
147; SI-NEXT:    s_endpgm
148;
149; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32:
150; GFX7:       ; %bb.0: ; %main_body
151; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
152; GFX7-NEXT:    s_mov_b32 s3, 0xf000
153; GFX7-NEXT:    s_mov_b32 s2, -1
154; GFX7-NEXT:    s_waitcnt vmcnt(0)
155; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
156; GFX7-NEXT:    s_endpgm
157;
158; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32:
159; GFX10:       ; %bb.0: ; %main_body
160; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
161; GFX10-NEXT:    s_waitcnt vmcnt(0)
162; GFX10-NEXT:    global_store_dword v[0:1], v0, off
163; GFX10-NEXT:    s_endpgm
164;
165; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32:
166; GFX1030:       ; %bb.0: ; %main_body
167; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
168; GFX1030-NEXT:    s_waitcnt vmcnt(0)
169; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
170; GFX1030-NEXT:    s_endpgm
171;
172; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32:
173; GFX1100:       ; %bb.0: ; %main_body
174; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
175; GFX1100-NEXT:    s_waitcnt vmcnt(0)
176; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
177; GFX1100-NEXT:    s_endpgm
178;
179; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32:
180; GFX12:       ; %bb.0: ; %main_body
181; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
182; GFX12-NEXT:    s_wait_loadcnt 0x0
183; GFX12-NEXT:    global_store_b32 v[0:1], v0, off
184; GFX12-NEXT:    s_endpgm
185;
186; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32:
187; G_SI:       ; %bb.0: ; %main_body
188; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
189; G_SI-NEXT:    s_mov_b32 s2, -1
190; G_SI-NEXT:    s_mov_b32 s3, 0xf000
191; G_SI-NEXT:    s_waitcnt vmcnt(0)
192; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
193; G_SI-NEXT:    s_endpgm
194;
195; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32:
196; G_GFX7:       ; %bb.0: ; %main_body
197; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
198; G_GFX7-NEXT:    s_mov_b32 s2, -1
199; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
200; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
201; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
202; G_GFX7-NEXT:    s_endpgm
203;
204; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32:
205; G_GFX10:       ; %bb.0: ; %main_body
206; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
207; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
208; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
209; G_GFX10-NEXT:    s_endpgm
210;
211; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32:
212; G_GFX1030:       ; %bb.0: ; %main_body
213; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
214; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
215; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
216; G_GFX1030-NEXT:    s_endpgm
217;
218; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32:
219; G_GFX1100:       ; %bb.0: ; %main_body
220; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
221; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
222; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
223; G_GFX1100-NEXT:    s_endpgm
224main_body:
225  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
226  store float %ret, ptr addrspace(1) undef
227  ret void
228}
229
230define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, ptr addrspace(3) %out) {
231; SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
232; SI:       ; %bb.0: ; %main_body
233; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
234; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
235; SI-NEXT:    s_mov_b32 m0, -1
236; SI-NEXT:    s_waitcnt lgkmcnt(0)
237; SI-NEXT:    v_mov_b32_e32 v0, s6
238; SI-NEXT:    v_mov_b32_e32 v1, s7
239; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
240; SI-NEXT:    s_load_dword s0, s[4:5], 0xf
241; SI-NEXT:    s_waitcnt lgkmcnt(0)
242; SI-NEXT:    v_mov_b32_e32 v1, s0
243; SI-NEXT:    s_waitcnt vmcnt(0)
244; SI-NEXT:    ds_write_b32 v1, v0
245; SI-NEXT:    s_endpgm
246;
247; GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
248; GFX7:       ; %bb.0: ; %main_body
249; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
250; GFX7-NEXT:    s_mov_b32 m0, -1
251; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
252; GFX7-NEXT:    v_mov_b32_e32 v0, s4
253; GFX7-NEXT:    v_mov_b32_e32 v1, s5
254; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
255; GFX7-NEXT:    v_mov_b32_e32 v1, s6
256; GFX7-NEXT:    s_waitcnt vmcnt(0)
257; GFX7-NEXT:    ds_write_b32 v1, v0
258; GFX7-NEXT:    s_endpgm
259;
260; GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
261; GFX10:       ; %bb.0: ; %main_body
262; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
263; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
264; GFX10-NEXT:    v_mov_b32_e32 v0, s12
265; GFX10-NEXT:    v_mov_b32_e32 v1, s13
266; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[8:11], 4 offen glc slc
267; GFX10-NEXT:    v_mov_b32_e32 v1, s14
268; GFX10-NEXT:    s_waitcnt vmcnt(0)
269; GFX10-NEXT:    ds_write_b32 v1, v0
270; GFX10-NEXT:    s_endpgm
271;
272; GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
273; GFX1030:       ; %bb.0: ; %main_body
274; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
275; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
276; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
277; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
278; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
279; GFX1030-NEXT:    v_mov_b32_e32 v1, s6
280; GFX1030-NEXT:    s_waitcnt vmcnt(0)
281; GFX1030-NEXT:    ds_write_b32 v1, v0
282; GFX1030-NEXT:    s_endpgm
283;
284; GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
285; GFX1100:       ; %bb.0: ; %main_body
286; GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
287; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
288; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
289; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc
290; GFX1100-NEXT:    v_mov_b32_e32 v1, s6
291; GFX1100-NEXT:    s_waitcnt vmcnt(0)
292; GFX1100-NEXT:    ds_store_b32 v1, v0
293; GFX1100-NEXT:    s_endpgm
294;
295; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
296; GFX12:       ; %bb.0: ; %main_body
297; GFX12-NEXT:    s_clause 0x1
298; GFX12-NEXT:    s_load_b96 s[8:10], s[4:5], 0x34
299; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
300; GFX12-NEXT:    s_mov_b32 s4, 4
301; GFX12-NEXT:    s_wait_kmcnt 0x0
302; GFX12-NEXT:    v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9
303; GFX12-NEXT:    buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
304; GFX12-NEXT:    v_mov_b32_e32 v1, s10
305; GFX12-NEXT:    s_wait_loadcnt 0x0
306; GFX12-NEXT:    ds_store_b32 v1, v0
307; GFX12-NEXT:    s_endpgm
308;
309; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
310; G_SI:       ; %bb.0: ; %main_body
311; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
312; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
313; G_SI-NEXT:    s_mov_b32 m0, -1
314; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
315; G_SI-NEXT:    v_mov_b32_e32 v0, s6
316; G_SI-NEXT:    v_mov_b32_e32 v1, s7
317; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
318; G_SI-NEXT:    s_load_dword s0, s[4:5], 0xf
319; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
320; G_SI-NEXT:    v_mov_b32_e32 v1, s0
321; G_SI-NEXT:    s_waitcnt vmcnt(0)
322; G_SI-NEXT:    ds_write_b32 v1, v0
323; G_SI-NEXT:    s_endpgm
324;
325; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
326; G_GFX7:       ; %bb.0: ; %main_body
327; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
328; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
329; G_GFX7-NEXT:    s_mov_b32 m0, -1
330; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
331; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
332; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
333; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
334; G_GFX7-NEXT:    s_load_dword s0, s[4:5], 0xf
335; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
336; G_GFX7-NEXT:    v_mov_b32_e32 v1, s0
337; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
338; G_GFX7-NEXT:    ds_write_b32 v1, v0
339; G_GFX7-NEXT:    s_endpgm
340;
341; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
342; G_GFX10:       ; %bb.0: ; %main_body
343; G_GFX10-NEXT:    s_clause 0x1
344; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
345; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
346; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
347; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
348; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
349; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
350; G_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
351; G_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x3c
352; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
353; G_GFX10-NEXT:    v_mov_b32_e32 v1, s0
354; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
355; G_GFX10-NEXT:    ds_write_b32 v1, v0
356; G_GFX10-NEXT:    s_endpgm
357;
358; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
359; G_GFX1030:       ; %bb.0: ; %main_body
360; G_GFX1030-NEXT:    s_clause 0x1
361; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
362; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
363; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
364; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
365; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
366; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
367; G_GFX1030-NEXT:    s_load_dword s0, s[4:5], 0x3c
368; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
369; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s0
370; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
371; G_GFX1030-NEXT:    ds_write_b32 v1, v0
372; G_GFX1030-NEXT:    s_endpgm
373;
374; G_GFX1100-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
375; G_GFX1100:       ; %bb.0: ; %main_body
376; G_GFX1100-NEXT:    s_clause 0x1
377; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
378; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
379; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
380; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
381; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc
382; G_GFX1100-NEXT:    s_load_b32 s0, s[4:5], 0x3c
383; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
384; G_GFX1100-NEXT:    v_mov_b32_e32 v1, s0
385; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
386; G_GFX1100-NEXT:    ds_store_b32 v1, v0
387; G_GFX1100-NEXT:    s_endpgm
388main_body:
389  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
390  store float %ret, ptr addrspace(3) %out, align 8
391  ret void
392}
393
394define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
395; SI-LABEL: raw_buffer_atomic_max_noret_f32:
396; SI:       ; %bb.0: ; %main_body
397; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
398; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
399; SI-NEXT:    s_waitcnt lgkmcnt(0)
400; SI-NEXT:    v_mov_b32_e32 v0, s6
401; SI-NEXT:    v_mov_b32_e32 v1, s7
402; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
403; SI-NEXT:    s_endpgm
404;
405; GFX7-LABEL: raw_buffer_atomic_max_noret_f32:
406; GFX7:       ; %bb.0: ; %main_body
407; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
408; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
409; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
410; GFX7-NEXT:    v_mov_b32_e32 v0, s6
411; GFX7-NEXT:    v_mov_b32_e32 v1, s7
412; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
413; GFX7-NEXT:    s_endpgm
414;
415; GFX10-LABEL: raw_buffer_atomic_max_noret_f32:
416; GFX10:       ; %bb.0: ; %main_body
417; GFX10-NEXT:    s_clause 0x1
418; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
419; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
420; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
421; GFX10-NEXT:    v_mov_b32_e32 v0, s6
422; GFX10-NEXT:    v_mov_b32_e32 v1, s7
423; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
424; GFX10-NEXT:    s_endpgm
425;
426; GFX1030-LABEL: raw_buffer_atomic_max_noret_f32:
427; GFX1030:       ; %bb.0: ; %main_body
428; GFX1030-NEXT:    s_clause 0x1
429; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
430; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
431; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
432; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
433; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
434; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
435; GFX1030-NEXT:    s_endpgm
436;
437; GFX1100-LABEL: raw_buffer_atomic_max_noret_f32:
438; GFX1100:       ; %bb.0: ; %main_body
439; GFX1100-NEXT:    s_clause 0x1
440; GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
441; GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
442; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
443; GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
444; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
445; GFX1100-NEXT:    s_endpgm
446;
447; GFX12-LABEL: raw_buffer_atomic_max_noret_f32:
448; GFX12:       ; %bb.0: ; %main_body
449; GFX12-NEXT:    s_clause 0x1
450; GFX12-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
451; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
452; GFX12-NEXT:    s_wait_kmcnt 0x0
453; GFX12-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
454; GFX12-NEXT:    buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
455; GFX12-NEXT:    s_endpgm
456;
457; G_SI-LABEL: raw_buffer_atomic_max_noret_f32:
458; G_SI:       ; %bb.0: ; %main_body
459; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
460; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
461; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
462; G_SI-NEXT:    v_mov_b32_e32 v0, s6
463; G_SI-NEXT:    v_mov_b32_e32 v1, s7
464; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
465; G_SI-NEXT:    s_endpgm
466;
467; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f32:
468; G_GFX7:       ; %bb.0: ; %main_body
469; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
470; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
471; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
472; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
473; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
474; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
475; G_GFX7-NEXT:    s_endpgm
476;
477; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f32:
478; G_GFX10:       ; %bb.0: ; %main_body
479; G_GFX10-NEXT:    s_clause 0x1
480; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
481; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
482; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
483; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
484; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
485; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
486; G_GFX10-NEXT:    s_endpgm
487;
488; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f32:
489; G_GFX1030:       ; %bb.0: ; %main_body
490; G_GFX1030-NEXT:    s_clause 0x1
491; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
492; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
493; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
494; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
495; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
496; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
497; G_GFX1030-NEXT:    s_endpgm
498;
499; G_GFX1100-LABEL: raw_buffer_atomic_max_noret_f32:
500; G_GFX1100:       ; %bb.0: ; %main_body
501; G_GFX1100-NEXT:    s_clause 0x1
502; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
503; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
504; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
505; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
506; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
507; G_GFX1100-NEXT:    s_endpgm
508main_body:
509  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
510  ret void
511}
512
513define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
514; SI-LABEL: raw_buffer_atomic_max_rtn_f32:
515; SI:       ; %bb.0: ; %main_body
516; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
517; SI-NEXT:    s_mov_b32 s3, 0xf000
518; SI-NEXT:    s_mov_b32 s2, -1
519; SI-NEXT:    s_waitcnt vmcnt(0)
520; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
521; SI-NEXT:    s_endpgm
522;
523; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32:
524; GFX7:       ; %bb.0: ; %main_body
525; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
526; GFX7-NEXT:    s_mov_b32 s3, 0xf000
527; GFX7-NEXT:    s_mov_b32 s2, -1
528; GFX7-NEXT:    s_waitcnt vmcnt(0)
529; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
530; GFX7-NEXT:    s_endpgm
531;
532; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32:
533; GFX10:       ; %bb.0: ; %main_body
534; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
535; GFX10-NEXT:    s_waitcnt vmcnt(0)
536; GFX10-NEXT:    global_store_dword v[0:1], v0, off
537; GFX10-NEXT:    s_endpgm
538;
539; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32:
540; GFX1030:       ; %bb.0: ; %main_body
541; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
542; GFX1030-NEXT:    s_waitcnt vmcnt(0)
543; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
544; GFX1030-NEXT:    s_endpgm
545;
546; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32:
547; GFX1100:       ; %bb.0: ; %main_body
548; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
549; GFX1100-NEXT:    s_waitcnt vmcnt(0)
550; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
551; GFX1100-NEXT:    s_endpgm
552;
553; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32:
554; GFX12:       ; %bb.0: ; %main_body
555; GFX12-NEXT:    buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
556; GFX12-NEXT:    s_wait_loadcnt 0x0
557; GFX12-NEXT:    global_store_b32 v[0:1], v0, off
558; GFX12-NEXT:    s_endpgm
559;
560; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32:
561; G_SI:       ; %bb.0: ; %main_body
562; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
563; G_SI-NEXT:    s_mov_b32 s2, -1
564; G_SI-NEXT:    s_mov_b32 s3, 0xf000
565; G_SI-NEXT:    s_waitcnt vmcnt(0)
566; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
567; G_SI-NEXT:    s_endpgm
568;
569; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32:
570; G_GFX7:       ; %bb.0: ; %main_body
571; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
572; G_GFX7-NEXT:    s_mov_b32 s2, -1
573; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
574; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
575; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
576; G_GFX7-NEXT:    s_endpgm
577;
578; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32:
579; G_GFX10:       ; %bb.0: ; %main_body
580; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
581; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
582; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
583; G_GFX10-NEXT:    s_endpgm
584;
585; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32:
586; G_GFX1030:       ; %bb.0: ; %main_body
587; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
588; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
589; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
590; G_GFX1030-NEXT:    s_endpgm
591;
592; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32:
593; G_GFX1100:       ; %bb.0: ; %main_body
594; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
595; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
596; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
597; G_GFX1100-NEXT:    s_endpgm
598main_body:
599  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
600  store float %ret, ptr addrspace(1) undef
601  ret void
602}
603
604define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inreg %rsrc, float %data, i32 %vindex, ptr addrspace(1) %out) {
605; SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
606; SI:       ; %bb.0: ; %main_body
607; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
608; SI-NEXT:    s_waitcnt lgkmcnt(0)
609; SI-NEXT:    v_mov_b32_e32 v0, s4
610; SI-NEXT:    v_mov_b32_e32 v1, s5
611; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
612; SI-NEXT:    s_mov_b32 s3, 0xf000
613; SI-NEXT:    s_mov_b32 s2, -1
614; SI-NEXT:    s_mov_b32 s0, s6
615; SI-NEXT:    s_mov_b32 s1, s7
616; SI-NEXT:    s_waitcnt vmcnt(0)
617; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
618; SI-NEXT:    s_endpgm
619;
620; GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
621; GFX7:       ; %bb.0: ; %main_body
622; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
623; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
624; GFX7-NEXT:    v_mov_b32_e32 v0, s4
625; GFX7-NEXT:    v_mov_b32_e32 v1, s5
626; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
627; GFX7-NEXT:    s_mov_b32 s3, 0xf000
628; GFX7-NEXT:    s_mov_b32 s2, -1
629; GFX7-NEXT:    s_mov_b32 s0, s6
630; GFX7-NEXT:    s_mov_b32 s1, s7
631; GFX7-NEXT:    s_waitcnt vmcnt(0)
632; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
633; GFX7-NEXT:    s_endpgm
634;
635; GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
636; GFX10:       ; %bb.0: ; %main_body
637; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
638; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
639; GFX10-NEXT:    v_mov_b32_e32 v0, s12
640; GFX10-NEXT:    v_mov_b32_e32 v1, s13
641; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc
642; GFX10-NEXT:    v_mov_b32_e32 v1, 0
643; GFX10-NEXT:    s_waitcnt vmcnt(0)
644; GFX10-NEXT:    global_store_dword v1, v0, s[14:15]
645; GFX10-NEXT:    s_endpgm
646;
647; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
648; GFX1030:       ; %bb.0: ; %main_body
649; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
650; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
651; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
652; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
653; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
654; GFX1030-NEXT:    v_mov_b32_e32 v1, 0
655; GFX1030-NEXT:    s_waitcnt vmcnt(0)
656; GFX1030-NEXT:    global_store_dword v1, v0, s[6:7]
657; GFX1030-NEXT:    s_endpgm
658;
659; GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
660; GFX1100:       ; %bb.0: ; %main_body
661; GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
662; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
663; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
664; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc
665; GFX1100-NEXT:    v_mov_b32_e32 v1, 0
666; GFX1100-NEXT:    s_waitcnt vmcnt(0)
667; GFX1100-NEXT:    global_store_b32 v1, v0, s[6:7]
668; GFX1100-NEXT:    s_endpgm
669;
670; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
671; GFX12:       ; %bb.0: ; %main_body
672; GFX12-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
673; GFX12-NEXT:    s_wait_kmcnt 0x0
674; GFX12-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
675; GFX12-NEXT:    s_mov_b32 s4, 4
676; GFX12-NEXT:    buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
677; GFX12-NEXT:    v_mov_b32_e32 v1, 0
678; GFX12-NEXT:    s_wait_loadcnt 0x0
679; GFX12-NEXT:    global_store_b32 v1, v0, s[6:7]
680; GFX12-NEXT:    s_endpgm
681;
682; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
683; G_SI:       ; %bb.0: ; %main_body
684; G_SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
685; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
686; G_SI-NEXT:    v_mov_b32_e32 v0, s4
687; G_SI-NEXT:    v_mov_b32_e32 v1, s5
688; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
689; G_SI-NEXT:    s_mov_b32 s2, -1
690; G_SI-NEXT:    s_mov_b32 s3, 0xf000
691; G_SI-NEXT:    s_mov_b64 s[0:1], s[6:7]
692; G_SI-NEXT:    s_waitcnt vmcnt(0)
693; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
694; G_SI-NEXT:    s_endpgm
695;
696; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
697; G_GFX7:       ; %bb.0: ; %main_body
698; G_GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
699; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
700; G_GFX7-NEXT:    v_mov_b32_e32 v0, s4
701; G_GFX7-NEXT:    v_mov_b32_e32 v1, s5
702; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
703; G_GFX7-NEXT:    s_mov_b32 s2, -1
704; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
705; G_GFX7-NEXT:    s_mov_b64 s[0:1], s[6:7]
706; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
707; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
708; G_GFX7-NEXT:    s_endpgm
709;
710; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
711; G_GFX10:       ; %bb.0: ; %main_body
712; G_GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
713; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
714; G_GFX10-NEXT:    v_mov_b32_e32 v0, s12
715; G_GFX10-NEXT:    v_mov_b32_e32 v1, s13
716; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc
717; G_GFX10-NEXT:    v_mov_b32_e32 v1, 0
718; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
719; G_GFX10-NEXT:    global_store_dword v1, v0, s[14:15]
720; G_GFX10-NEXT:    s_endpgm
721;
722; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
723; G_GFX1030:       ; %bb.0: ; %main_body
724; G_GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
725; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
726; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s4
727; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s5
728; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
729; G_GFX1030-NEXT:    v_mov_b32_e32 v1, 0
730; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
731; G_GFX1030-NEXT:    global_store_dword v1, v0, s[6:7]
732; G_GFX1030-NEXT:    s_endpgm
733;
734; G_GFX1100-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
735; G_GFX1100:       ; %bb.0: ; %main_body
736; G_GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
737; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
738; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
739; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc
740; G_GFX1100-NEXT:    v_mov_b32_e32 v1, 0
741; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
742; G_GFX1100-NEXT:    global_store_b32 v1, v0, s[6:7]
743; G_GFX1100-NEXT:    s_endpgm
744main_body:
745  %ret = call float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
746  store float %ret, ptr addrspace(1) %out, align 8
747  ret void
748}
749