xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=SI
3; RUN: llc < %s -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=GFX7
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
6
7; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
8; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s  -check-prefix=G_GFX7
9; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
10; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
11
12declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
13declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
14
15
16define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
17; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
18; SI:       ; %bb.0: ; %main_body
19; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
20; SI-NEXT:    s_load_dword s8, s[4:5], 0xf
21; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
22; SI-NEXT:    s_waitcnt lgkmcnt(0)
23; SI-NEXT:    v_mov_b32_e32 v0, s6
24; SI-NEXT:    v_mov_b32_e32 v1, s7
25; SI-NEXT:    v_mov_b32_e32 v2, s8
26; SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
27; SI-NEXT:    s_endpgm
28;
29; GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
30; GFX7:       ; %bb.0: ; %main_body
31; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
32; GFX7-NEXT:    s_load_dword s8, s[4:5], 0xf
33; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
34; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
35; GFX7-NEXT:    v_mov_b32_e32 v0, s6
36; GFX7-NEXT:    v_mov_b32_e32 v1, s7
37; GFX7-NEXT:    v_mov_b32_e32 v2, s8
38; GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
39; GFX7-NEXT:    s_endpgm
40;
41; GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
42; GFX10:       ; %bb.0: ; %main_body
43; GFX10-NEXT:    s_clause 0x2
44; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
45; GFX10-NEXT:    s_load_dword s8, s[4:5], 0x3c
46; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
47; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
48; GFX10-NEXT:    v_mov_b32_e32 v0, s6
49; GFX10-NEXT:    v_mov_b32_e32 v1, s7
50; GFX10-NEXT:    v_mov_b32_e32 v2, s8
51; GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
52; GFX10-NEXT:    s_endpgm
53;
54; GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
55; GFX1030:       ; %bb.0: ; %main_body
56; GFX1030-NEXT:    s_clause 0x2
57; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
58; GFX1030-NEXT:    s_load_dword s8, s[4:5], 0x3c
59; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
60; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
61; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
62; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
63; GFX1030-NEXT:    v_mov_b32_e32 v2, s8
64; GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
65; GFX1030-NEXT:    s_endpgm
66;
67; G_SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
68; G_SI:       ; %bb.0: ; %main_body
69; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
70; G_SI-NEXT:    s_load_dword s8, s[4:5], 0xf
71; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
72; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
73; G_SI-NEXT:    v_mov_b32_e32 v0, s6
74; G_SI-NEXT:    v_mov_b32_e32 v1, s7
75; G_SI-NEXT:    v_mov_b32_e32 v2, s8
76; G_SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
77; G_SI-NEXT:    s_endpgm
78;
79; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
80; G_GFX7:       ; %bb.0: ; %main_body
81; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
82; G_GFX7-NEXT:    s_load_dword s8, s[4:5], 0xf
83; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
84; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
85; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
86; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
87; G_GFX7-NEXT:    v_mov_b32_e32 v2, s8
88; G_GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
89; G_GFX7-NEXT:    s_endpgm
90;
91; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
92; G_GFX10:       ; %bb.0: ; %main_body
93; G_GFX10-NEXT:    s_clause 0x2
94; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
95; G_GFX10-NEXT:    s_load_dword s8, s[4:5], 0x3c
96; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
97; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
98; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
99; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
100; G_GFX10-NEXT:    v_mov_b32_e32 v2, s8
101; G_GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
102; G_GFX10-NEXT:    s_endpgm
103;
104; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
105; G_GFX1030:       ; %bb.0: ; %main_body
106; G_GFX1030-NEXT:    s_clause 0x2
107; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
108; G_GFX1030-NEXT:    s_load_dword s8, s[4:5], 0x3c
109; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
110; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
111; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
112; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
113; G_GFX1030-NEXT:    v_mov_b32_e32 v2, s8
114; G_GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen
115; G_GFX1030-NEXT:    s_endpgm
116main_body:
117  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
118  ret void
119}
120
121define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
122; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
123; SI:       ; %bb.0: ; %main_body
124; SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
125; SI-NEXT:    s_mov_b32 m0, -1
126; SI-NEXT:    s_waitcnt vmcnt(0)
127; SI-NEXT:    ds_write_b64 v0, v[0:1]
128; SI-NEXT:    s_endpgm
129;
130; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
131; GFX7:       ; %bb.0: ; %main_body
132; GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
133; GFX7-NEXT:    s_mov_b32 m0, -1
134; GFX7-NEXT:    s_waitcnt vmcnt(0)
135; GFX7-NEXT:    ds_write_b64 v0, v[0:1]
136; GFX7-NEXT:    s_endpgm
137;
138; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
139; GFX10:       ; %bb.0: ; %main_body
140; GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
141; GFX10-NEXT:    s_waitcnt vmcnt(0)
142; GFX10-NEXT:    ds_write_b64 v0, v[0:1]
143; GFX10-NEXT:    s_endpgm
144;
145; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
146; GFX1030:       ; %bb.0: ; %main_body
147; GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
148; GFX1030-NEXT:    s_waitcnt vmcnt(0)
149; GFX1030-NEXT:    ds_write_b64 v0, v[0:1]
150; GFX1030-NEXT:    s_endpgm
151;
152; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
153; G_SI:       ; %bb.0: ; %main_body
154; G_SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
155; G_SI-NEXT:    s_mov_b32 m0, -1
156; G_SI-NEXT:    s_waitcnt vmcnt(0)
157; G_SI-NEXT:    ds_write_b64 v0, v[0:1]
158; G_SI-NEXT:    s_endpgm
159;
160; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
161; G_GFX7:       ; %bb.0: ; %main_body
162; G_GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
163; G_GFX7-NEXT:    s_mov_b32 m0, -1
164; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
165; G_GFX7-NEXT:    ds_write_b64 v0, v[0:1]
166; G_GFX7-NEXT:    s_endpgm
167;
168; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
169; G_GFX10:       ; %bb.0: ; %main_body
170; G_GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
171; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
172; G_GFX10-NEXT:    ds_write_b64 v0, v[0:1]
173; G_GFX10-NEXT:    s_endpgm
174;
175; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
176; G_GFX1030:       ; %bb.0: ; %main_body
177; G_GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 0 offen glc
178; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
179; G_GFX1030-NEXT:    ds_write_b64 v0, v[0:1]
180; G_GFX1030-NEXT:    s_endpgm
181main_body:
182  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
183  store double %ret, ptr addrspace(3) undef
184  ret void
185}
186
187define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
188; SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
189; SI:       ; %bb.0: ; %main_body
190; SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
191; SI-NEXT:    s_mov_b32 m0, -1
192; SI-NEXT:    s_waitcnt vmcnt(0)
193; SI-NEXT:    ds_write_b64 v3, v[0:1]
194; SI-NEXT:    s_endpgm
195;
196; GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
197; GFX7:       ; %bb.0: ; %main_body
198; GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
199; GFX7-NEXT:    s_mov_b32 m0, -1
200; GFX7-NEXT:    s_waitcnt vmcnt(0)
201; GFX7-NEXT:    ds_write_b64 v3, v[0:1]
202; GFX7-NEXT:    s_endpgm
203;
204; GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
205; GFX10:       ; %bb.0: ; %main_body
206; GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
207; GFX10-NEXT:    s_waitcnt vmcnt(0)
208; GFX10-NEXT:    ds_write_b64 v3, v[0:1]
209; GFX10-NEXT:    s_endpgm
210;
211; GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
212; GFX1030:       ; %bb.0: ; %main_body
213; GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
214; GFX1030-NEXT:    s_waitcnt vmcnt(0)
215; GFX1030-NEXT:    ds_write_b64 v3, v[0:1]
216; GFX1030-NEXT:    s_endpgm
217;
218; G_SI-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
219; G_SI:       ; %bb.0: ; %main_body
220; G_SI-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
221; G_SI-NEXT:    s_mov_b32 m0, -1
222; G_SI-NEXT:    s_waitcnt vmcnt(0)
223; G_SI-NEXT:    ds_write_b64 v3, v[0:1]
224; G_SI-NEXT:    s_endpgm
225;
226; G_GFX7-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
227; G_GFX7:       ; %bb.0: ; %main_body
228; G_GFX7-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
229; G_GFX7-NEXT:    s_mov_b32 m0, -1
230; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
231; G_GFX7-NEXT:    ds_write_b64 v3, v[0:1]
232; G_GFX7-NEXT:    s_endpgm
233;
234; G_GFX10-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
235; G_GFX10:       ; %bb.0: ; %main_body
236; G_GFX10-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
237; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
238; G_GFX10-NEXT:    ds_write_b64 v3, v[0:1]
239; G_GFX10-NEXT:    s_endpgm
240;
241; G_GFX1030-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
242; G_GFX1030:       ; %bb.0: ; %main_body
243; G_GFX1030-NEXT:    buffer_atomic_fmin_x2 v[0:1], v2, s[0:3], 4 offen glc slc
244; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
245; G_GFX1030-NEXT:    ds_write_b64 v3, v[0:1]
246; G_GFX1030-NEXT:    s_endpgm
247main_body:
248  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
249  store double %ret, ptr addrspace(3) %out, align 8
250  ret void
251}
252
253define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
254; SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
255; SI:       ; %bb.0: ; %main_body
256; SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
257; SI-NEXT:    s_load_dword s8, s[4:5], 0xf
258; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
259; SI-NEXT:    s_waitcnt lgkmcnt(0)
260; SI-NEXT:    v_mov_b32_e32 v0, s6
261; SI-NEXT:    v_mov_b32_e32 v1, s7
262; SI-NEXT:    v_mov_b32_e32 v2, s8
263; SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
264; SI-NEXT:    s_endpgm
265;
266; GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
267; GFX7:       ; %bb.0: ; %main_body
268; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
269; GFX7-NEXT:    s_load_dword s8, s[4:5], 0xf
270; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
271; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
272; GFX7-NEXT:    v_mov_b32_e32 v0, s6
273; GFX7-NEXT:    v_mov_b32_e32 v1, s7
274; GFX7-NEXT:    v_mov_b32_e32 v2, s8
275; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
276; GFX7-NEXT:    s_endpgm
277;
278; GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
279; GFX10:       ; %bb.0: ; %main_body
280; GFX10-NEXT:    s_clause 0x2
281; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
282; GFX10-NEXT:    s_load_dword s8, s[4:5], 0x3c
283; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
284; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
285; GFX10-NEXT:    v_mov_b32_e32 v0, s6
286; GFX10-NEXT:    v_mov_b32_e32 v1, s7
287; GFX10-NEXT:    v_mov_b32_e32 v2, s8
288; GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
289; GFX10-NEXT:    s_endpgm
290;
291; GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
292; GFX1030:       ; %bb.0: ; %main_body
293; GFX1030-NEXT:    s_clause 0x2
294; GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
295; GFX1030-NEXT:    s_load_dword s8, s[4:5], 0x3c
296; GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
297; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
298; GFX1030-NEXT:    v_mov_b32_e32 v0, s6
299; GFX1030-NEXT:    v_mov_b32_e32 v1, s7
300; GFX1030-NEXT:    v_mov_b32_e32 v2, s8
301; GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
302; GFX1030-NEXT:    s_endpgm
303;
304; G_SI-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
305; G_SI:       ; %bb.0: ; %main_body
306; G_SI-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
307; G_SI-NEXT:    s_load_dword s8, s[4:5], 0xf
308; G_SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
309; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
310; G_SI-NEXT:    v_mov_b32_e32 v0, s6
311; G_SI-NEXT:    v_mov_b32_e32 v1, s7
312; G_SI-NEXT:    v_mov_b32_e32 v2, s8
313; G_SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
314; G_SI-NEXT:    s_endpgm
315;
316; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
317; G_GFX7:       ; %bb.0: ; %main_body
318; G_GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0xd
319; G_GFX7-NEXT:    s_load_dword s8, s[4:5], 0xf
320; G_GFX7-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
321; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
322; G_GFX7-NEXT:    v_mov_b32_e32 v0, s6
323; G_GFX7-NEXT:    v_mov_b32_e32 v1, s7
324; G_GFX7-NEXT:    v_mov_b32_e32 v2, s8
325; G_GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
326; G_GFX7-NEXT:    s_endpgm
327;
328; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
329; G_GFX10:       ; %bb.0: ; %main_body
330; G_GFX10-NEXT:    s_clause 0x2
331; G_GFX10-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
332; G_GFX10-NEXT:    s_load_dword s8, s[4:5], 0x3c
333; G_GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
334; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
335; G_GFX10-NEXT:    v_mov_b32_e32 v0, s6
336; G_GFX10-NEXT:    v_mov_b32_e32 v1, s7
337; G_GFX10-NEXT:    v_mov_b32_e32 v2, s8
338; G_GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
339; G_GFX10-NEXT:    s_endpgm
340;
341; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
342; G_GFX1030:       ; %bb.0: ; %main_body
343; G_GFX1030-NEXT:    s_clause 0x2
344; G_GFX1030-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
345; G_GFX1030-NEXT:    s_load_dword s8, s[4:5], 0x3c
346; G_GFX1030-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
347; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
348; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s6
349; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s7
350; G_GFX1030-NEXT:    v_mov_b32_e32 v2, s8
351; G_GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen
352; G_GFX1030-NEXT:    s_endpgm
353main_body:
354  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
355  ret void
356}
357
358define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
359; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
360; SI:       ; %bb.0: ; %main_body
361; SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
362; SI-NEXT:    s_mov_b32 m0, -1
363; SI-NEXT:    s_waitcnt vmcnt(0)
364; SI-NEXT:    ds_write_b64 v0, v[0:1]
365; SI-NEXT:    s_endpgm
366;
367; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
368; GFX7:       ; %bb.0: ; %main_body
369; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
370; GFX7-NEXT:    s_mov_b32 m0, -1
371; GFX7-NEXT:    s_waitcnt vmcnt(0)
372; GFX7-NEXT:    ds_write_b64 v0, v[0:1]
373; GFX7-NEXT:    s_endpgm
374;
375; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
376; GFX10:       ; %bb.0: ; %main_body
377; GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
378; GFX10-NEXT:    s_waitcnt vmcnt(0)
379; GFX10-NEXT:    ds_write_b64 v0, v[0:1]
380; GFX10-NEXT:    s_endpgm
381;
382; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
383; GFX1030:       ; %bb.0: ; %main_body
384; GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
385; GFX1030-NEXT:    s_waitcnt vmcnt(0)
386; GFX1030-NEXT:    ds_write_b64 v0, v[0:1]
387; GFX1030-NEXT:    s_endpgm
388;
389; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
390; G_SI:       ; %bb.0: ; %main_body
391; G_SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
392; G_SI-NEXT:    s_mov_b32 m0, -1
393; G_SI-NEXT:    s_waitcnt vmcnt(0)
394; G_SI-NEXT:    ds_write_b64 v0, v[0:1]
395; G_SI-NEXT:    s_endpgm
396;
397; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
398; G_GFX7:       ; %bb.0: ; %main_body
399; G_GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
400; G_GFX7-NEXT:    s_mov_b32 m0, -1
401; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
402; G_GFX7-NEXT:    ds_write_b64 v0, v[0:1]
403; G_GFX7-NEXT:    s_endpgm
404;
405; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
406; G_GFX10:       ; %bb.0: ; %main_body
407; G_GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
408; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
409; G_GFX10-NEXT:    ds_write_b64 v0, v[0:1]
410; G_GFX10-NEXT:    s_endpgm
411;
412; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
413; G_GFX1030:       ; %bb.0: ; %main_body
414; G_GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 0 offen glc
415; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
416; G_GFX1030-NEXT:    ds_write_b64 v0, v[0:1]
417; G_GFX1030-NEXT:    s_endpgm
418main_body:
419  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
420  store double %ret, ptr addrspace(3) undef
421  ret void
422}
423
424define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex, ptr addrspace(3) %out) {
425; SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
426; SI:       ; %bb.0: ; %main_body
427; SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
428; SI-NEXT:    s_mov_b32 m0, -1
429; SI-NEXT:    s_waitcnt lgkmcnt(0)
430; SI-NEXT:    v_mov_b32_e32 v0, s4
431; SI-NEXT:    v_mov_b32_e32 v1, s5
432; SI-NEXT:    v_mov_b32_e32 v2, s6
433; SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
434; SI-NEXT:    v_mov_b32_e32 v2, s7
435; SI-NEXT:    s_waitcnt vmcnt(0)
436; SI-NEXT:    ds_write_b64 v2, v[0:1]
437; SI-NEXT:    s_endpgm
438;
439; GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
440; GFX7:       ; %bb.0: ; %main_body
441; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
442; GFX7-NEXT:    s_mov_b32 m0, -1
443; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
444; GFX7-NEXT:    v_mov_b32_e32 v0, s4
445; GFX7-NEXT:    v_mov_b32_e32 v1, s5
446; GFX7-NEXT:    v_mov_b32_e32 v2, s6
447; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
448; GFX7-NEXT:    v_mov_b32_e32 v2, s7
449; GFX7-NEXT:    s_waitcnt vmcnt(0)
450; GFX7-NEXT:    ds_write_b64 v2, v[0:1]
451; GFX7-NEXT:    s_endpgm
452;
453; GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
454; GFX10:       ; %bb.0: ; %main_body
455; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
456; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
457; GFX10-NEXT:    v_mov_b32_e32 v0, s12
458; GFX10-NEXT:    v_mov_b32_e32 v1, s13
459; GFX10-NEXT:    v_mov_b32_e32 v2, s14
460; GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc
461; GFX10-NEXT:    v_mov_b32_e32 v2, s15
462; GFX10-NEXT:    s_waitcnt vmcnt(0)
463; GFX10-NEXT:    ds_write_b64 v2, v[0:1]
464; GFX10-NEXT:    s_endpgm
465;
466; GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
467; GFX1030:       ; %bb.0: ; %main_body
468; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
469; GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
470; GFX1030-NEXT:    v_mov_b32_e32 v0, s4
471; GFX1030-NEXT:    v_mov_b32_e32 v1, s5
472; GFX1030-NEXT:    v_mov_b32_e32 v2, s6
473; GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
474; GFX1030-NEXT:    v_mov_b32_e32 v2, s7
475; GFX1030-NEXT:    s_waitcnt vmcnt(0)
476; GFX1030-NEXT:    ds_write_b64 v2, v[0:1]
477; GFX1030-NEXT:    s_endpgm
478;
479; G_SI-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
480; G_SI:       ; %bb.0: ; %main_body
481; G_SI-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
482; G_SI-NEXT:    s_mov_b32 m0, -1
483; G_SI-NEXT:    s_waitcnt lgkmcnt(0)
484; G_SI-NEXT:    v_mov_b32_e32 v0, s4
485; G_SI-NEXT:    v_mov_b32_e32 v1, s5
486; G_SI-NEXT:    v_mov_b32_e32 v2, s6
487; G_SI-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
488; G_SI-NEXT:    v_mov_b32_e32 v2, s7
489; G_SI-NEXT:    s_waitcnt vmcnt(0)
490; G_SI-NEXT:    ds_write_b64 v2, v[0:1]
491; G_SI-NEXT:    s_endpgm
492;
493; G_GFX7-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
494; G_GFX7:       ; %bb.0: ; %main_body
495; G_GFX7-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x9
496; G_GFX7-NEXT:    s_mov_b32 m0, -1
497; G_GFX7-NEXT:    s_waitcnt lgkmcnt(0)
498; G_GFX7-NEXT:    v_mov_b32_e32 v0, s4
499; G_GFX7-NEXT:    v_mov_b32_e32 v1, s5
500; G_GFX7-NEXT:    v_mov_b32_e32 v2, s6
501; G_GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
502; G_GFX7-NEXT:    v_mov_b32_e32 v2, s7
503; G_GFX7-NEXT:    s_waitcnt vmcnt(0)
504; G_GFX7-NEXT:    ds_write_b64 v2, v[0:1]
505; G_GFX7-NEXT:    s_endpgm
506;
507; G_GFX10-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
508; G_GFX10:       ; %bb.0: ; %main_body
509; G_GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x24
510; G_GFX10-NEXT:    s_waitcnt lgkmcnt(0)
511; G_GFX10-NEXT:    v_mov_b32_e32 v0, s12
512; G_GFX10-NEXT:    v_mov_b32_e32 v1, s13
513; G_GFX10-NEXT:    v_mov_b32_e32 v2, s14
514; G_GFX10-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[8:11], 4 offen glc slc
515; G_GFX10-NEXT:    v_mov_b32_e32 v2, s15
516; G_GFX10-NEXT:    s_waitcnt vmcnt(0)
517; G_GFX10-NEXT:    ds_write_b64 v2, v[0:1]
518; G_GFX10-NEXT:    s_endpgm
519;
520; G_GFX1030-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
521; G_GFX1030:       ; %bb.0: ; %main_body
522; G_GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[4:5], 0x24
523; G_GFX1030-NEXT:    s_waitcnt lgkmcnt(0)
524; G_GFX1030-NEXT:    v_mov_b32_e32 v0, s4
525; G_GFX1030-NEXT:    v_mov_b32_e32 v1, s5
526; G_GFX1030-NEXT:    v_mov_b32_e32 v2, s6
527; G_GFX1030-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[0:3], 4 offen glc slc
528; G_GFX1030-NEXT:    v_mov_b32_e32 v2, s7
529; G_GFX1030-NEXT:    s_waitcnt vmcnt(0)
530; G_GFX1030-NEXT:    ds_write_b64 v2, v[0:1]
531; G_GFX1030-NEXT:    s_endpgm
532main_body:
533  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
534  store double %ret, ptr addrspace(3) %out, align 8
535  ret void
536}
537