xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=GFX7
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
7
8; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=G_GFX7
10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
11; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
12; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
13
14declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
15declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
16
17
18define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) {
19; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
20; SI:       ; %bb.0: ; %main_body
21; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
22; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
23; SI-NEXT:    s_waitcnt lgkmcnt(0)
24; SI-NEXT:    v_mov_b32_e32 v0, s6
25; SI-NEXT:    v_mov_b32_e32 v1, s7
26; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
27; SI-NEXT:    s_endpgm
28;
29; GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
30; GFX7:       ; %bb.0: ; %main_body
31; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
32; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
33; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
34; GFX7-NEXT:    v_mov_b32_e32 v0, s6
35; GFX7-NEXT:    v_mov_b32_e32 v1, s7
36; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
37; GFX7-NEXT:    s_endpgm
38;
39; GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
40; GFX10:       ; %bb.0: ; %main_body
41; GFX10-NEXT:    s_clause 0x1
42; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
43; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
44; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
45; GFX10-NEXT:    v_mov_b32_e32 v0, s6
46; GFX10-NEXT:    v_mov_b32_e32 v1, s7
47; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
48; GFX10-NEXT:    s_endpgm
49;
50; GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
51; GFX1030:       ; %bb.0: ; %main_body
52; GFX1030-NEXT:    s_clause 0x1
53; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
54; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
55; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
56; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
57; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
58; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
59; GFX1030-NEXT:    s_endpgm
60;
61; GFX1100-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
62; GFX1100:       ; %bb.0: ; %main_body
63; GFX1100-NEXT:    s_clause 0x1
64; GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
65; GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
66; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
67; GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
68; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
69; GFX1100-NEXT:    s_endpgm
70;
71; G_SI-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
72; G_SI:       ; %bb.0: ; %main_body
73; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
74; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
75; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
76; G_SI-NEXT:    v_mov_b32_e32 v0, s6
77; G_SI-NEXT:    v_mov_b32_e32 v1, s7
78; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
79; G_SI-NEXT:    s_endpgm
80;
81; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
82; G_GFX7:       ; %bb.0: ; %main_body
83; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
84; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
85; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
86; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
87; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
88; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
89; G_GFX7-NEXT:    s_endpgm
90;
91; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
92; G_GFX10:       ; %bb.0: ; %main_body
93; G_GFX10-NEXT:    s_clause 0x1
94; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
95; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
96; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
97; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
98; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
99; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
100; G_GFX10-NEXT:    s_endpgm
101;
102; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
103; G_GFX1030:       ; %bb.0: ; %main_body
104; G_GFX1030-NEXT:    s_clause 0x1
105; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
106; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
107; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
108; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
109; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
110; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen
111; G_GFX1030-NEXT:    s_endpgm
112;
113; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
114; G_GFX1100:       ; %bb.0: ; %main_body
115; G_GFX1100-NEXT:    s_clause 0x1
116; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
117; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
118; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
119; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
120; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen
121; G_GFX1100-NEXT:    s_endpgm
122main_body:
123  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
124  ret void
125}
126
127define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) {
128; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
129; SI:       ; %bb.0: ; %main_body
130; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
131; SI-NEXT:    s_mov_b32 s3, 0xf000
132; SI-NEXT:    s_mov_b32 s2, -1
133; SI-NEXT:    s_waitcnt vmcnt(0)
134; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
135; SI-NEXT:    s_endpgm
136;
137; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
138; GFX7:       ; %bb.0: ; %main_body
139; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
140; GFX7-NEXT:    s_mov_b32 s3, 0xf000
141; GFX7-NEXT:    s_mov_b32 s2, -1
142; GFX7-NEXT:    s_waitcnt vmcnt(0)
143; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
144; GFX7-NEXT:    s_endpgm
145;
146; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
147; GFX10:       ; %bb.0: ; %main_body
148; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
149; GFX10-NEXT:    s_waitcnt vmcnt(0)
150; GFX10-NEXT:    global_store_dword v[0:1], v0, off
151; GFX10-NEXT:    s_endpgm
152;
153; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
154; GFX1030:       ; %bb.0: ; %main_body
155; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
156; GFX1030-NEXT:    s_waitcnt vmcnt(0)
157; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
158; GFX1030-NEXT:    s_endpgm
159;
160; GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
161; GFX1100:       ; %bb.0: ; %main_body
162; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
163; GFX1100-NEXT:    s_waitcnt vmcnt(0)
164; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
165; GFX1100-NEXT:    s_endpgm
166;
167; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
168; G_SI:       ; %bb.0: ; %main_body
169; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
170; G_SI-NEXT:    s_mov_b32 s2, -1
171; G_SI-NEXT:    s_mov_b32 s3, 0xf000
172; G_SI-NEXT:    s_waitcnt vmcnt(0)
173; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
174; G_SI-NEXT:    s_endpgm
175;
176; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
177; G_GFX7:       ; %bb.0: ; %main_body
178; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
179; G_GFX7-NEXT:    s_mov_b32 s2, -1
180; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
181; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
182; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
183; G_GFX7-NEXT:    s_endpgm
184;
185; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
186; G_GFX10:       ; %bb.0: ; %main_body
187; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
188; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
189; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
190; G_GFX10-NEXT:    s_endpgm
191;
192; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
193; G_GFX1030:       ; %bb.0: ; %main_body
194; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
195; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
196; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
197; G_GFX1030-NEXT:    s_endpgm
198;
199; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32:
200; G_GFX1100:       ; %bb.0: ; %main_body
201; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 0 offen glc
202; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
203; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
204; G_GFX1100-NEXT:    s_endpgm
205main_body:
206  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
207  store float %ret, ptr addrspace(1) undef
208  ret void
209}
210
211define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f32_off4_slc(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex, ptr addrspace(3) %out) {
212; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
213; SI:       ; %bb.0: ; %main_body
214; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
215; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
216; SI-NEXT:    s_mov_b32 m0, -1
217; SI-NEXT:    s_waitcnt lgkmcnt(0)
218; SI-NEXT:    v_mov_b32_e32 v0, s6
219; SI-NEXT:    v_mov_b32_e32 v1, s7
220; SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
221; SI-NEXT:    s_load_dword s0, s[4:5], 0xf
222; SI-NEXT:    s_waitcnt lgkmcnt(0)
223; SI-NEXT:    v_mov_b32_e32 v1, s0
224; SI-NEXT:    s_waitcnt vmcnt(0)
225; SI-NEXT:    ds_write_b32 v1, v0
226; SI-NEXT:    s_endpgm
227;
228; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
229; GFX7:       ; %bb.0: ; %main_body
230; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
231; GFX7-NEXT:    s_mov_b32 m0, -1
232; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
233; GFX7-NEXT:    v_mov_b32_e32 v0, s4
234; GFX7-NEXT:    v_mov_b32_e32 v1, s5
235; GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
236; GFX7-NEXT:    v_mov_b32_e32 v1, s6
237; GFX7-NEXT:    s_waitcnt vmcnt(0)
238; GFX7-NEXT:    ds_write_b32 v1, v0
239; GFX7-NEXT:    s_endpgm
240;
241; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
242; GFX10:       ; %bb.0: ; %main_body
243; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
244; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
245; GFX10-NEXT:    v_mov_b32_e32 v0, s12
246; GFX10-NEXT:    v_mov_b32_e32 v1, s13
247; GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[8:11], 4 offen glc slc
248; GFX10-NEXT:    v_mov_b32_e32 v1, s14
249; GFX10-NEXT:    s_waitcnt vmcnt(0)
250; GFX10-NEXT:    ds_write_b32 v1, v0
251; GFX10-NEXT:    s_endpgm
252;
253; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
254; GFX1030:       ; %bb.0: ; %main_body
255; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
256; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
257; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
258; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
259; GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
260; GFX1030-NEXT:    v_mov_b32_e32 v1, s6
261; GFX1030-NEXT:    s_waitcnt vmcnt(0)
262; GFX1030-NEXT:    ds_write_b32 v1, v0
263; GFX1030-NEXT:    s_endpgm
264;
265; GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
266; GFX1100:       ; %bb.0: ; %main_body
267; GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
268; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
269; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
270; GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc
271; GFX1100-NEXT:    v_mov_b32_e32 v1, s6
272; GFX1100-NEXT:    s_waitcnt vmcnt(0)
273; GFX1100-NEXT:    ds_store_b32 v1, v0
274; GFX1100-NEXT:    s_endpgm
275;
276; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
277; G_SI:       ; %bb.0: ; %main_body
278; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
279; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
280; G_SI-NEXT:    s_mov_b32 m0, -1
281; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
282; G_SI-NEXT:    v_mov_b32_e32 v0, s6
283; G_SI-NEXT:    v_mov_b32_e32 v1, s7
284; G_SI-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
285; G_SI-NEXT:    s_load_dword s0, s[4:5], 0xf
286; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
287; G_SI-NEXT:    v_mov_b32_e32 v1, s0
288; G_SI-NEXT:    s_waitcnt vmcnt(0)
289; G_SI-NEXT:    ds_write_b32 v1, v0
290; G_SI-NEXT:    s_endpgm
291;
292; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
293; G_GFX7:       ; %bb.0: ; %main_body
294; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
295; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
296; G_GFX7-NEXT:    s_mov_b32 m0, -1
297; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
298; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
299; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
300; G_GFX7-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
301; G_GFX7-NEXT:    s_load_dword s0, s[4:5], 0xf
302; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
303; G_GFX7-NEXT:    v_mov_b32_e32 v1, s0
304; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
305; G_GFX7-NEXT:    ds_write_b32 v1, v0
306; G_GFX7-NEXT:    s_endpgm
307;
308; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
309; G_GFX10:       ; %bb.0: ; %main_body
310; G_GFX10-NEXT:    s_clause 0x1
311; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
312; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
313; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
314; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
315; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
316; G_GFX10-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
317; G_GFX10-NEXT:    s_waitcnt_depctr 0xffe3
318; G_GFX10-NEXT:    s_load_dword s0, s[4:5], 0x3c
319; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
320; G_GFX10-NEXT:    v_mov_b32_e32 v1, s0
321; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
322; G_GFX10-NEXT:    ds_write_b32 v1, v0
323; G_GFX10-NEXT:    s_endpgm
324;
325; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
326; G_GFX1030:       ; %bb.0: ; %main_body
327; G_GFX1030-NEXT:    s_clause 0x1
328; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
329; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
330; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
331; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
332; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
333; G_GFX1030-NEXT:    buffer_atomic_fmin v0, v1, s[0:3], 4 offen glc slc
334; G_GFX1030-NEXT:    s_load_dword s0, s[4:5], 0x3c
335; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
336; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s0
337; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
338; G_GFX1030-NEXT:    ds_write_b32 v1, v0
339; G_GFX1030-NEXT:    s_endpgm
340;
341; G_GFX1100-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
342; G_GFX1100:       ; %bb.0: ; %main_body
343; G_GFX1100-NEXT:    s_clause 0x1
344; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
345; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
346; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
347; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
348; G_GFX1100-NEXT:    buffer_atomic_min_f32 v0, v1, s[0:3], 4 offen glc slc
349; G_GFX1100-NEXT:    s_load_b32 s0, s[4:5], 0x3c
350; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
351; G_GFX1100-NEXT:    v_mov_b32_e32 v1, s0
352; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
353; G_GFX1100-NEXT:    ds_store_b32 v1, v0
354; G_GFX1100-NEXT:    s_endpgm
355; GFX1010-LABEL: raw_ptr_buffer_atomic_min_rtn_f32_off4_slc:
356main_body:
357  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
358  store float %ret, ptr addrspace(3) %out, align 8
359  ret void
360}
361
362define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) {
363; SI-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
364; SI:       ; %bb.0: ; %main_body
365; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
366; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
367; SI-NEXT:    s_waitcnt lgkmcnt(0)
368; SI-NEXT:    v_mov_b32_e32 v0, s6
369; SI-NEXT:    v_mov_b32_e32 v1, s7
370; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
371; SI-NEXT:    s_endpgm
372;
373; GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
374; GFX7:       ; %bb.0: ; %main_body
375; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
376; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
377; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
378; GFX7-NEXT:    v_mov_b32_e32 v0, s6
379; GFX7-NEXT:    v_mov_b32_e32 v1, s7
380; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
381; GFX7-NEXT:    s_endpgm
382;
383; GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
384; GFX10:       ; %bb.0: ; %main_body
385; GFX10-NEXT:    s_clause 0x1
386; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
387; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
388; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
389; GFX10-NEXT:    v_mov_b32_e32 v0, s6
390; GFX10-NEXT:    v_mov_b32_e32 v1, s7
391; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
392; GFX10-NEXT:    s_endpgm
393;
394; GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
395; GFX1030:       ; %bb.0: ; %main_body
396; GFX1030-NEXT:    s_clause 0x1
397; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
398; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
399; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
400; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
401; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
402; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
403; GFX1030-NEXT:    s_endpgm
404;
405; GFX1100-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
406; GFX1100:       ; %bb.0: ; %main_body
407; GFX1100-NEXT:    s_clause 0x1
408; GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
409; GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
410; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
411; GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
412; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
413; GFX1100-NEXT:    s_endpgm
414;
415; G_SI-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
416; G_SI:       ; %bb.0: ; %main_body
417; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
418; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
419; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
420; G_SI-NEXT:    v_mov_b32_e32 v0, s6
421; G_SI-NEXT:    v_mov_b32_e32 v1, s7
422; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
423; G_SI-NEXT:    s_endpgm
424;
425; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
426; G_GFX7:       ; %bb.0: ; %main_body
427; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
428; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
429; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
430; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
431; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
432; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
433; G_GFX7-NEXT:    s_endpgm
434;
435; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
436; G_GFX10:       ; %bb.0: ; %main_body
437; G_GFX10-NEXT:    s_clause 0x1
438; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
439; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
440; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
441; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
442; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
443; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
444; G_GFX10-NEXT:    s_endpgm
445;
446; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
447; G_GFX1030:       ; %bb.0: ; %main_body
448; G_GFX1030-NEXT:    s_clause 0x1
449; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
450; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
451; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
452; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
453; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
454; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen
455; G_GFX1030-NEXT:    s_endpgm
456;
457; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_noret_f32:
458; G_GFX1100:       ; %bb.0: ; %main_body
459; G_GFX1100-NEXT:    s_clause 0x1
460; G_GFX1100-NEXT:    s_load_b64 s[6:7], s[4:5], 0x34
461; G_GFX1100-NEXT:    s_load_b128 s[0:3], s[4:5], 0x24
462; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
463; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s6 :: v_dual_mov_b32 v1, s7
464; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen
465; G_GFX1100-NEXT:    s_endpgm
466main_body:
467  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
468  ret void
469}
470
471define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) {
472; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
473; SI:       ; %bb.0: ; %main_body
474; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
475; SI-NEXT:    s_mov_b32 s3, 0xf000
476; SI-NEXT:    s_mov_b32 s2, -1
477; SI-NEXT:    s_waitcnt vmcnt(0)
478; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
479; SI-NEXT:    s_endpgm
480;
481; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
482; GFX7:       ; %bb.0: ; %main_body
483; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
484; GFX7-NEXT:    s_mov_b32 s3, 0xf000
485; GFX7-NEXT:    s_mov_b32 s2, -1
486; GFX7-NEXT:    s_waitcnt vmcnt(0)
487; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
488; GFX7-NEXT:    s_endpgm
489;
490; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
491; GFX10:       ; %bb.0: ; %main_body
492; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
493; GFX10-NEXT:    s_waitcnt vmcnt(0)
494; GFX10-NEXT:    global_store_dword v[0:1], v0, off
495; GFX10-NEXT:    s_endpgm
496;
497; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
498; GFX1030:       ; %bb.0: ; %main_body
499; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
500; GFX1030-NEXT:    s_waitcnt vmcnt(0)
501; GFX1030-NEXT:    global_store_dword v[0:1], v0, off
502; GFX1030-NEXT:    s_endpgm
503;
504; GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
505; GFX1100:       ; %bb.0: ; %main_body
506; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
507; GFX1100-NEXT:    s_waitcnt vmcnt(0)
508; GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
509; GFX1100-NEXT:    s_endpgm
510;
511; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
512; G_SI:       ; %bb.0: ; %main_body
513; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
514; G_SI-NEXT:    s_mov_b32 s2, -1
515; G_SI-NEXT:    s_mov_b32 s3, 0xf000
516; G_SI-NEXT:    s_waitcnt vmcnt(0)
517; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
518; G_SI-NEXT:    s_endpgm
519;
520; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
521; G_GFX7:       ; %bb.0: ; %main_body
522; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
523; G_GFX7-NEXT:    s_mov_b32 s2, -1
524; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
525; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
526; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
527; G_GFX7-NEXT:    s_endpgm
528;
529; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
530; G_GFX10:       ; %bb.0: ; %main_body
531; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
532; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
533; G_GFX10-NEXT:    global_store_dword v[0:1], v0, off
534; G_GFX10-NEXT:    s_endpgm
535;
536; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
537; G_GFX1030:       ; %bb.0: ; %main_body
538; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
539; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
540; G_GFX1030-NEXT:    global_store_dword v[0:1], v0, off
541; G_GFX1030-NEXT:    s_endpgm
542;
543; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32:
544; G_GFX1100:       ; %bb.0: ; %main_body
545; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 0 offen glc
546; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
547; G_GFX1100-NEXT:    global_store_b32 v[0:1], v0, off
548; G_GFX1100-NEXT:    s_endpgm
549main_body:
550  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
551  store float %ret, ptr addrspace(1) undef
552  ret void
553}
554
555define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f32_off4_slc(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex, ptr addrspace(1) %out) {
556; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
557; SI:       ; %bb.0: ; %main_body
558; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
559; SI-NEXT:    s_waitcnt lgkmcnt(0)
560; SI-NEXT:    v_mov_b32_e32 v0, s4
561; SI-NEXT:    v_mov_b32_e32 v1, s5
562; SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
563; SI-NEXT:    s_mov_b32 s3, 0xf000
564; SI-NEXT:    s_mov_b32 s2, -1
565; SI-NEXT:    s_mov_b32 s0, s6
566; SI-NEXT:    s_mov_b32 s1, s7
567; SI-NEXT:    s_waitcnt vmcnt(0)
568; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
569; SI-NEXT:    s_endpgm
570;
571; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
572; GFX7:       ; %bb.0: ; %main_body
573; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
574; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
575; GFX7-NEXT:    v_mov_b32_e32 v0, s4
576; GFX7-NEXT:    v_mov_b32_e32 v1, s5
577; GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
578; GFX7-NEXT:    s_mov_b32 s3, 0xf000
579; GFX7-NEXT:    s_mov_b32 s2, -1
580; GFX7-NEXT:    s_mov_b32 s0, s6
581; GFX7-NEXT:    s_mov_b32 s1, s7
582; GFX7-NEXT:    s_waitcnt vmcnt(0)
583; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
584; GFX7-NEXT:    s_endpgm
585;
586; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
587; GFX10:       ; %bb.0: ; %main_body
588; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
589; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
590; GFX10-NEXT:    v_mov_b32_e32 v0, s12
591; GFX10-NEXT:    v_mov_b32_e32 v1, s13
592; GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc
593; GFX10-NEXT:    v_mov_b32_e32 v1, 0
594; GFX10-NEXT:    s_waitcnt vmcnt(0)
595; GFX10-NEXT:    global_store_dword v1, v0, s[14:15]
596; GFX10-NEXT:    s_endpgm
597;
598; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
599; GFX1030:       ; %bb.0: ; %main_body
600; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
601; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
602; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
603; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
604; GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
605; GFX1030-NEXT:    v_mov_b32_e32 v1, 0
606; GFX1030-NEXT:    s_waitcnt vmcnt(0)
607; GFX1030-NEXT:    global_store_dword v1, v0, s[6:7]
608; GFX1030-NEXT:    s_endpgm
609;
610; GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
611; GFX1100:       ; %bb.0: ; %main_body
612; GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
613; GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
614; GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
615; GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc
616; GFX1100-NEXT:    v_mov_b32_e32 v1, 0
617; GFX1100-NEXT:    s_waitcnt vmcnt(0)
618; GFX1100-NEXT:    global_store_b32 v1, v0, s[6:7]
619; GFX1100-NEXT:    s_endpgm
620;
621; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
622; G_SI:       ; %bb.0: ; %main_body
623; G_SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
624; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
625; G_SI-NEXT:    v_mov_b32_e32 v0, s4
626; G_SI-NEXT:    v_mov_b32_e32 v1, s5
627; G_SI-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
628; G_SI-NEXT:    s_mov_b32 s2, -1
629; G_SI-NEXT:    s_mov_b32 s3, 0xf000
630; G_SI-NEXT:    s_mov_b64 s[0:1], s[6:7]
631; G_SI-NEXT:    s_waitcnt vmcnt(0)
632; G_SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
633; G_SI-NEXT:    s_endpgm
634;
635; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
636; G_GFX7:       ; %bb.0: ; %main_body
637; G_GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
638; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
639; G_GFX7-NEXT:    v_mov_b32_e32 v0, s4
640; G_GFX7-NEXT:    v_mov_b32_e32 v1, s5
641; G_GFX7-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
642; G_GFX7-NEXT:    s_mov_b32 s2, -1
643; G_GFX7-NEXT:    s_mov_b32 s3, 0xf000
644; G_GFX7-NEXT:    s_mov_b64 s[0:1], s[6:7]
645; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
646; G_GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
647; G_GFX7-NEXT:    s_endpgm
648;
649; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
650; G_GFX10:       ; %bb.0: ; %main_body
651; G_GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
652; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
653; G_GFX10-NEXT:    v_mov_b32_e32 v0, s12
654; G_GFX10-NEXT:    v_mov_b32_e32 v1, s13
655; G_GFX10-NEXT:    buffer_atomic_fmax v0, v1, s[8:11], 4 offen glc slc
656; G_GFX10-NEXT:    v_mov_b32_e32 v1, 0
657; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
658; G_GFX10-NEXT:    global_store_dword v1, v0, s[14:15]
659; G_GFX10-NEXT:    s_endpgm
660;
661; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
662; G_GFX1030:       ; %bb.0: ; %main_body
663; G_GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
664; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
665; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s4
666; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s5
667; G_GFX1030-NEXT:    buffer_atomic_fmax v0, v1, s[0:3], 4 offen glc slc
668; G_GFX1030-NEXT:    v_mov_b32_e32 v1, 0
669; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
670; G_GFX1030-NEXT:    global_store_dword v1, v0, s[6:7]
671; G_GFX1030-NEXT:    s_endpgm
672;
673; G_GFX1100-LABEL: raw_ptr_buffer_atomic_max_rtn_f32_off4_slc:
674; G_GFX1100:       ; %bb.0: ; %main_body
675; G_GFX1100-NEXT:    s_load_b256 s[0:7], s[4:5], 0x24
676; G_GFX1100-NEXT:    s_waitcnt lgkmcnt(0)
677; G_GFX1100-NEXT:    v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
678; G_GFX1100-NEXT:    buffer_atomic_max_f32 v0, v1, s[0:3], 4 offen glc slc
679; G_GFX1100-NEXT:    v_mov_b32_e32 v1, 0
680; G_GFX1100-NEXT:    s_waitcnt vmcnt(0)
681; G_GFX1100-NEXT:    global_store_b32 v1, v0, s[6:7]
682; G_GFX1100-NEXT:    s_endpgm
683main_body:
684  %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
685  store float %ret, ptr addrspace(1) %out, align 8
686  ret void
687}
688