xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX90A
3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -amdgpu-atomic-optimizer-strategy=None | FileCheck %s -check-prefix=GFX940
4
5declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
6declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
7declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
8declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
9declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
10declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
11declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
12declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
13declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
14declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
15declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
16declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
17declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
18
19define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
20; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
21; GFX90A:       ; %bb.0: ; %main_body
22; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
23; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
24; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
25; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
26; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
27; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
28; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
29; GFX90A-NEXT:    s_endpgm
30;
31; GFX940-LABEL: raw_buffer_atomic_add_noret_f64:
32; GFX940:       ; %bb.0: ; %main_body
33; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
34; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
35; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
36; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
37; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
38; GFX940-NEXT:    v_mov_b32_e32 v2, s8
39; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
40; GFX940-NEXT:    s_endpgm
41main_body:
42  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
43  ret void
44}
45
46define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
47; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64:
48; GFX90A:       ; %bb.0: ; %main_body
49; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
50; GFX90A-NEXT:    s_waitcnt vmcnt(0)
51; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
52; GFX90A-NEXT:    s_endpgm
53;
54; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64:
55; GFX940:       ; %bb.0: ; %main_body
56; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
57; GFX940-NEXT:    s_waitcnt vmcnt(0)
58; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
59; GFX940-NEXT:    s_endpgm
60main_body:
61  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
62  store double %ret, ptr undef
63  ret void
64}
65
66define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
67; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
68; GFX90A:       ; %bb.0: ; %main_body
69; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
70; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
71; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
72; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
73; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
74; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
75; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
76; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc
77; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
78; GFX90A-NEXT:    s_waitcnt vmcnt(0)
79; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
80; GFX90A-NEXT:    s_endpgm
81;
82; GFX940-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
83; GFX940:       ; %bb.0: ; %main_body
84; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
85; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
86; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
87; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
88; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
89; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
90; GFX940-NEXT:    v_mov_b32_e32 v2, s10
91; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
92; GFX940-NEXT:    v_mov_b32_e32 v2, 0
93; GFX940-NEXT:    s_waitcnt vmcnt(0)
94; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
95; GFX940-NEXT:    s_endpgm
96main_body:
97  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
98  store double %ret, ptr addrspace(1) %out, align 8
99  ret void
100}
101
102define amdgpu_kernel void @raw_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
103; GFX90A-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
104; GFX90A:       ; %bb.0: ; %main_body
105; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
106; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
107; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
108; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
109; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
110; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
111; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
112; GFX90A-NEXT:    s_endpgm
113;
114; GFX940-LABEL: raw_ptr_buffer_atomic_add_noret_f64:
115; GFX940:       ; %bb.0: ; %main_body
116; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
117; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
118; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
119; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
120; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
121; GFX940-NEXT:    v_mov_b32_e32 v2, s8
122; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen
123; GFX940-NEXT:    s_endpgm
124main_body:
125  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
126  ret void
127}
128
129define amdgpu_ps void @raw_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
130; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
131; GFX90A:       ; %bb.0: ; %main_body
132; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
133; GFX90A-NEXT:    s_waitcnt vmcnt(0)
134; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
135; GFX90A-NEXT:    s_endpgm
136;
137; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64:
138; GFX940:       ; %bb.0: ; %main_body
139; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen sc0
140; GFX940-NEXT:    s_waitcnt vmcnt(0)
141; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
142; GFX940-NEXT:    s_endpgm
143main_body:
144  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
145  store double %ret, ptr undef
146  ret void
147}
148
149define amdgpu_kernel void @raw_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
150; GFX90A-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
151; GFX90A:       ; %bb.0: ; %main_body
152; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
153; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
154; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
155; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
156; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
157; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
158; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
159; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen glc slc
160; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
161; GFX90A-NEXT:    s_waitcnt vmcnt(0)
162; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
163; GFX90A-NEXT:    s_endpgm
164;
165; GFX940-LABEL: raw_ptr_buffer_atomic_add_rtn_f64_off4_slc:
166; GFX940:       ; %bb.0: ; %main_body
167; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
168; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
169; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
170; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
171; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
172; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
173; GFX940-NEXT:    v_mov_b32_e32 v2, s10
174; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
175; GFX940-NEXT:    v_mov_b32_e32 v2, 0
176; GFX940-NEXT:    s_waitcnt vmcnt(0)
177; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
178; GFX940-NEXT:    s_endpgm
179main_body:
180  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
181  store double %ret, ptr addrspace(1) %out, align 8
182  ret void
183}
184
185define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
186; GFX90A-LABEL: struct_buffer_atomic_add_noret_f64:
187; GFX90A:       ; %bb.0: ; %main_body
188; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
189; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
190; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
191; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
192; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
193; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
194; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
195; GFX90A-NEXT:    s_endpgm
196;
197; GFX940-LABEL: struct_buffer_atomic_add_noret_f64:
198; GFX940:       ; %bb.0: ; %main_body
199; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
200; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
201; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
202; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
203; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
204; GFX940-NEXT:    v_mov_b32_e32 v2, s8
205; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
206; GFX940-NEXT:    s_endpgm
207main_body:
208  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
209  ret void
210}
211
212define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
213; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64:
214; GFX90A:       ; %bb.0: ; %main_body
215; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc
216; GFX90A-NEXT:    s_waitcnt vmcnt(0)
217; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
218; GFX90A-NEXT:    s_endpgm
219;
220; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64:
221; GFX940:       ; %bb.0: ; %main_body
222; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
223; GFX940-NEXT:    s_waitcnt vmcnt(0)
224; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
225; GFX940-NEXT:    s_endpgm
226main_body:
227  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
228  store double %ret, ptr undef
229  ret void
230}
231
232define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
233; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
234; GFX90A:       ; %bb.0: ; %main_body
235; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
236; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
237; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
238; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
239; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
240; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
241; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
242; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
243; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
244; GFX90A-NEXT:    s_waitcnt vmcnt(0)
245; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
246; GFX90A-NEXT:    s_endpgm
247;
248; GFX940-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
249; GFX940:       ; %bb.0: ; %main_body
250; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
251; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
252; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
253; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
254; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
255; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
256; GFX940-NEXT:    v_mov_b32_e32 v2, s10
257; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
258; GFX940-NEXT:    v_mov_b32_e32 v2, 0
259; GFX940-NEXT:    s_waitcnt vmcnt(0)
260; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
261; GFX940-NEXT:    s_endpgm
262main_body:
263  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
264  store double %ret, ptr addrspace(1) %out, align 8
265  ret void
266}
267
268define amdgpu_kernel void @struct_ptr_buffer_atomic_add_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
269; GFX90A-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
270; GFX90A:       ; %bb.0: ; %main_body
271; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
272; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
273; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
274; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
275; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
276; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
277; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
278; GFX90A-NEXT:    s_endpgm
279;
280; GFX940-LABEL: struct_ptr_buffer_atomic_add_noret_f64:
281; GFX940:       ; %bb.0: ; %main_body
282; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
283; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
284; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
285; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
286; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
287; GFX940-NEXT:    v_mov_b32_e32 v2, s8
288; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen
289; GFX940-NEXT:    s_endpgm
290main_body:
291  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
292  ret void
293}
294
295define amdgpu_ps void @struct_ptr_buffer_atomic_add_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
296; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
297; GFX90A:       ; %bb.0: ; %main_body
298; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc
299; GFX90A-NEXT:    s_waitcnt vmcnt(0)
300; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
301; GFX90A-NEXT:    s_endpgm
302;
303; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64:
304; GFX940:       ; %bb.0: ; %main_body
305; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen sc0
306; GFX940-NEXT:    s_waitcnt vmcnt(0)
307; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
308; GFX940-NEXT:    s_endpgm
309main_body:
310  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
311  store double %ret, ptr undef
312  ret void
313}
314
315define amdgpu_kernel void @struct_ptr_buffer_atomic_add_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
316; GFX90A-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
317; GFX90A:       ; %bb.0: ; %main_body
318; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
319; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
320; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
321; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
322; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
323; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
324; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
325; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
326; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
327; GFX90A-NEXT:    s_waitcnt vmcnt(0)
328; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
329; GFX90A-NEXT:    s_endpgm
330;
331; GFX940-LABEL: struct_ptr_buffer_atomic_add_rtn_f64_off4_slc:
332; GFX940:       ; %bb.0: ; %main_body
333; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
334; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
335; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
336; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
337; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
338; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
339; GFX940-NEXT:    v_mov_b32_e32 v2, s10
340; GFX940-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
341; GFX940-NEXT:    v_mov_b32_e32 v2, 0
342; GFX940-NEXT:    s_waitcnt vmcnt(0)
343; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
344; GFX940-NEXT:    s_endpgm
345main_body:
346  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
347  store double %ret, ptr addrspace(1) %out, align 8
348  ret void
349}
350
351define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
352; GFX90A-LABEL: raw_buffer_atomic_min_noret_f64:
353; GFX90A:       ; %bb.0: ; %main_body
354; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
355; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
356; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
357; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
358; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
359; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
360; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
361; GFX90A-NEXT:    s_endpgm
362;
363; GFX940-LABEL: raw_buffer_atomic_min_noret_f64:
364; GFX940:       ; %bb.0: ; %main_body
365; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
366; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
367; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
368; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
369; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
370; GFX940-NEXT:    v_mov_b32_e32 v2, s8
371; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
372; GFX940-NEXT:    s_endpgm
373main_body:
374  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
375  ret void
376}
377
378define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
379; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64:
380; GFX90A:       ; %bb.0: ; %main_body
381; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
382; GFX90A-NEXT:    s_waitcnt vmcnt(0)
383; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
384; GFX90A-NEXT:    s_endpgm
385;
386; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64:
387; GFX940:       ; %bb.0: ; %main_body
388; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
389; GFX940-NEXT:    s_waitcnt vmcnt(0)
390; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
391; GFX940-NEXT:    s_endpgm
392main_body:
393  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
394  store double %ret, ptr undef
395  ret void
396}
397
398define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
399; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
400; GFX90A:       ; %bb.0: ; %main_body
401; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
402; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
403; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
404; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
405; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
406; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
407; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
408; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc
409; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
410; GFX90A-NEXT:    s_waitcnt vmcnt(0)
411; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
412; GFX90A-NEXT:    s_endpgm
413;
414; GFX940-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
415; GFX940:       ; %bb.0: ; %main_body
416; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
417; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
418; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
419; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
420; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
421; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
422; GFX940-NEXT:    v_mov_b32_e32 v2, s10
423; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
424; GFX940-NEXT:    v_mov_b32_e32 v2, 0
425; GFX940-NEXT:    s_waitcnt vmcnt(0)
426; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
427; GFX940-NEXT:    s_endpgm
428main_body:
429  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
430  store double %ret, ptr addrspace(1) %out, align 8
431  ret void
432}
433
434define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
435; GFX90A-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
436; GFX90A:       ; %bb.0: ; %main_body
437; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
438; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
439; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
440; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
441; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
442; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
443; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
444; GFX90A-NEXT:    s_endpgm
445;
446; GFX940-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
447; GFX940:       ; %bb.0: ; %main_body
448; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
449; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
450; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
451; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
452; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
453; GFX940-NEXT:    v_mov_b32_e32 v2, s8
454; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen
455; GFX940-NEXT:    s_endpgm
456main_body:
457  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
458  ret void
459}
460
461define amdgpu_ps void @raw_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
462; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
463; GFX90A:       ; %bb.0: ; %main_body
464; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc
465; GFX90A-NEXT:    s_waitcnt vmcnt(0)
466; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
467; GFX90A-NEXT:    s_endpgm
468;
469; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64:
470; GFX940:       ; %bb.0: ; %main_body
471; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen sc0
472; GFX940-NEXT:    s_waitcnt vmcnt(0)
473; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
474; GFX940-NEXT:    s_endpgm
475main_body:
476  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
477  store double %ret, ptr undef
478  ret void
479}
480
481define amdgpu_kernel void @raw_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
482; GFX90A-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
483; GFX90A:       ; %bb.0: ; %main_body
484; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
485; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
486; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
487; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
488; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
489; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
490; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
491; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen glc slc
492; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
493; GFX90A-NEXT:    s_waitcnt vmcnt(0)
494; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
495; GFX90A-NEXT:    s_endpgm
496;
497; GFX940-LABEL: raw_ptr_buffer_atomic_min_rtn_f64_off4_slc:
498; GFX940:       ; %bb.0: ; %main_body
499; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
500; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
501; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
502; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
503; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
504; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
505; GFX940-NEXT:    v_mov_b32_e32 v2, s10
506; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
507; GFX940-NEXT:    v_mov_b32_e32 v2, 0
508; GFX940-NEXT:    s_waitcnt vmcnt(0)
509; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
510; GFX940-NEXT:    s_endpgm
511main_body:
512  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
513  store double %ret, ptr addrspace(1) %out, align 8
514  ret void
515}
516
517define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
518; GFX90A-LABEL: struct_buffer_atomic_min_noret_f64:
519; GFX90A:       ; %bb.0: ; %main_body
520; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
521; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
522; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
523; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
524; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
525; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
526; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
527; GFX90A-NEXT:    s_endpgm
528;
529; GFX940-LABEL: struct_buffer_atomic_min_noret_f64:
530; GFX940:       ; %bb.0: ; %main_body
531; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
532; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
533; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
534; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
535; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
536; GFX940-NEXT:    v_mov_b32_e32 v2, s8
537; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
538; GFX940-NEXT:    s_endpgm
539main_body:
540  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
541  ret void
542}
543
544define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
545; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64:
546; GFX90A:       ; %bb.0: ; %main_body
547; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc
548; GFX90A-NEXT:    s_waitcnt vmcnt(0)
549; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
550; GFX90A-NEXT:    s_endpgm
551;
552; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64:
553; GFX940:       ; %bb.0: ; %main_body
554; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
555; GFX940-NEXT:    s_waitcnt vmcnt(0)
556; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
557; GFX940-NEXT:    s_endpgm
558main_body:
559  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
560  store double %ret, ptr undef
561  ret void
562}
563
564define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
565; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
566; GFX90A:       ; %bb.0: ; %main_body
567; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
568; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
569; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
570; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
571; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
572; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
573; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
574; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
575; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
577; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
578; GFX90A-NEXT:    s_endpgm
579;
580; GFX940-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
581; GFX940:       ; %bb.0: ; %main_body
582; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
583; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
584; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
585; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
586; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
587; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
588; GFX940-NEXT:    v_mov_b32_e32 v2, s10
589; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
590; GFX940-NEXT:    v_mov_b32_e32 v2, 0
591; GFX940-NEXT:    s_waitcnt vmcnt(0)
592; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
593; GFX940-NEXT:    s_endpgm
594main_body:
595  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
596  store double %ret, ptr addrspace(1) %out, align 8
597  ret void
598}
599
600define amdgpu_kernel void @struct_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
601; GFX90A-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
602; GFX90A:       ; %bb.0: ; %main_body
603; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
604; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
605; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
606; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
607; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
608; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
609; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
610; GFX90A-NEXT:    s_endpgm
611;
612; GFX940-LABEL: struct_ptr_buffer_atomic_min_noret_f64:
613; GFX940:       ; %bb.0: ; %main_body
614; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
615; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
616; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
617; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
618; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
619; GFX940-NEXT:    v_mov_b32_e32 v2, s8
620; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen
621; GFX940-NEXT:    s_endpgm
622main_body:
623  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
624  ret void
625}
626
627define amdgpu_ps void @struct_ptr_buffer_atomic_min_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
628; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
629; GFX90A:       ; %bb.0: ; %main_body
630; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc
631; GFX90A-NEXT:    s_waitcnt vmcnt(0)
632; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
633; GFX90A-NEXT:    s_endpgm
634;
635; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64:
636; GFX940:       ; %bb.0: ; %main_body
637; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen sc0
638; GFX940-NEXT:    s_waitcnt vmcnt(0)
639; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
640; GFX940-NEXT:    s_endpgm
641main_body:
642  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
643  store double %ret, ptr undef
644  ret void
645}
646
647define amdgpu_kernel void @struct_ptr_buffer_atomic_min_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
648; GFX90A-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
649; GFX90A:       ; %bb.0: ; %main_body
650; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
651; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
652; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
653; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
654; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
655; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
656; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
657; GFX90A-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
658; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
659; GFX90A-NEXT:    s_waitcnt vmcnt(0)
660; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
661; GFX90A-NEXT:    s_endpgm
662;
663; GFX940-LABEL: struct_ptr_buffer_atomic_min_rtn_f64_off4_slc:
664; GFX940:       ; %bb.0: ; %main_body
665; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
666; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
667; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
668; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
669; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
670; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
671; GFX940-NEXT:    v_mov_b32_e32 v2, s10
672; GFX940-NEXT:    buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
673; GFX940-NEXT:    v_mov_b32_e32 v2, 0
674; GFX940-NEXT:    s_waitcnt vmcnt(0)
675; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
676; GFX940-NEXT:    s_endpgm
677main_body:
678  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
679  store double %ret, ptr addrspace(1) %out, align 8
680  ret void
681}
682
683define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
684; GFX90A-LABEL: raw_buffer_atomic_max_noret_f64:
685; GFX90A:       ; %bb.0: ; %main_body
686; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
687; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
688; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
689; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
690; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
691; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
692; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
693; GFX90A-NEXT:    s_endpgm
694;
695; GFX940-LABEL: raw_buffer_atomic_max_noret_f64:
696; GFX940:       ; %bb.0: ; %main_body
697; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
698; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
699; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
700; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
701; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
702; GFX940-NEXT:    v_mov_b32_e32 v2, s8
703; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
704; GFX940-NEXT:    s_endpgm
705main_body:
706  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
707  ret void
708}
709
710define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
711; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64:
712; GFX90A:       ; %bb.0: ; %main_body
713; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
714; GFX90A-NEXT:    s_waitcnt vmcnt(0)
715; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
716; GFX90A-NEXT:    s_endpgm
717;
718; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64:
719; GFX940:       ; %bb.0: ; %main_body
720; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
721; GFX940-NEXT:    s_waitcnt vmcnt(0)
722; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
723; GFX940-NEXT:    s_endpgm
724main_body:
725  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
726  store double %ret, ptr undef
727  ret void
728}
729
730define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
731; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
732; GFX90A:       ; %bb.0: ; %main_body
733; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
734; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
735; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
736; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
737; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
738; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
739; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
740; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc
741; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
742; GFX90A-NEXT:    s_waitcnt vmcnt(0)
743; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
744; GFX90A-NEXT:    s_endpgm
745;
746; GFX940-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
747; GFX940:       ; %bb.0: ; %main_body
748; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
749; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
750; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
751; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
752; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
753; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
754; GFX940-NEXT:    v_mov_b32_e32 v2, s10
755; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
756; GFX940-NEXT:    v_mov_b32_e32 v2, 0
757; GFX940-NEXT:    s_waitcnt vmcnt(0)
758; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
759; GFX940-NEXT:    s_endpgm
760main_body:
761  %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
762  store double %ret, ptr addrspace(1) %out, align 8
763  ret void
764}
765
766define amdgpu_kernel void @raw_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
767; GFX90A-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
768; GFX90A:       ; %bb.0: ; %main_body
769; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
770; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
771; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
772; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
773; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
774; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
775; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
776; GFX90A-NEXT:    s_endpgm
777;
778; GFX940-LABEL: raw_ptr_buffer_atomic_max_noret_f64:
779; GFX940:       ; %bb.0: ; %main_body
780; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
781; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
782; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
783; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
784; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
785; GFX940-NEXT:    v_mov_b32_e32 v2, s8
786; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen
787; GFX940-NEXT:    s_endpgm
788main_body:
789  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
790  ret void
791}
792
793define amdgpu_ps void @raw_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
794; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
795; GFX90A:       ; %bb.0: ; %main_body
796; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc
797; GFX90A-NEXT:    s_waitcnt vmcnt(0)
798; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
799; GFX90A-NEXT:    s_endpgm
800;
801; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64:
802; GFX940:       ; %bb.0: ; %main_body
803; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen sc0
804; GFX940-NEXT:    s_waitcnt vmcnt(0)
805; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
806; GFX940-NEXT:    s_endpgm
807main_body:
808  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0)
809  store double %ret, ptr undef
810  ret void
811}
812
813define amdgpu_kernel void @raw_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
814; GFX90A-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
815; GFX90A:       ; %bb.0: ; %main_body
816; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
817; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
818; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
819; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
820; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
821; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
822; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
823; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen glc slc
824; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
825; GFX90A-NEXT:    s_waitcnt vmcnt(0)
826; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
827; GFX90A-NEXT:    s_endpgm
828;
829; GFX940-LABEL: raw_ptr_buffer_atomic_max_rtn_f64_off4_slc:
830; GFX940:       ; %bb.0: ; %main_body
831; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
832; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
833; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
834; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
835; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
836; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
837; GFX940-NEXT:    v_mov_b32_e32 v2, s10
838; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 4 offen sc0 nt
839; GFX940-NEXT:    v_mov_b32_e32 v2, 0
840; GFX940-NEXT:    s_waitcnt vmcnt(0)
841; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
842; GFX940-NEXT:    s_endpgm
843main_body:
844  %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 2)
845  store double %ret, ptr addrspace(1) %out, align 8
846  ret void
847}
848
849define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
850; GFX90A-LABEL: struct_buffer_atomic_max_noret_f64:
851; GFX90A:       ; %bb.0: ; %main_body
852; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
853; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
854; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
855; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
856; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
857; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
858; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
859; GFX90A-NEXT:    s_endpgm
860;
861; GFX940-LABEL: struct_buffer_atomic_max_noret_f64:
862; GFX940:       ; %bb.0: ; %main_body
863; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
864; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
865; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
866; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
867; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
868; GFX940-NEXT:    v_mov_b32_e32 v2, s8
869; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
870; GFX940-NEXT:    s_endpgm
871main_body:
872  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
873  ret void
874}
875
876define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
877; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64:
878; GFX90A:       ; %bb.0: ; %main_body
879; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc
880; GFX90A-NEXT:    s_waitcnt vmcnt(0)
881; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
882; GFX90A-NEXT:    s_endpgm
883;
884; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64:
885; GFX940:       ; %bb.0: ; %main_body
886; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
887; GFX940-NEXT:    s_waitcnt vmcnt(0)
888; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
889; GFX940-NEXT:    s_endpgm
890main_body:
891  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
892  store double %ret, ptr undef
893  ret void
894}
895
896define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
897; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
898; GFX90A:       ; %bb.0: ; %main_body
899; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
900; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
901; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
902; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
903; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
904; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
905; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
906; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
907; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
908; GFX90A-NEXT:    s_waitcnt vmcnt(0)
909; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
910; GFX90A-NEXT:    s_endpgm
911;
912; GFX940-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
913; GFX940:       ; %bb.0: ; %main_body
914; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
915; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
916; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
917; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
918; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
919; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
920; GFX940-NEXT:    v_mov_b32_e32 v2, s10
921; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
922; GFX940-NEXT:    v_mov_b32_e32 v2, 0
923; GFX940-NEXT:    s_waitcnt vmcnt(0)
924; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
925; GFX940-NEXT:    s_endpgm
926main_body:
927  %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
928  store double %ret, ptr addrspace(1) %out, align 8
929  ret void
930}
931
932define amdgpu_kernel void @struct_ptr_buffer_atomic_max_noret_f64(ptr addrspace(8) %rsrc, double %data, i32 %vindex) {
933; GFX90A-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
934; GFX90A:       ; %bb.0: ; %main_body
935; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
936; GFX90A-NEXT:    s_load_dword s8, s[4:5], 0x3c
937; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
938; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
939; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
940; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
941; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
942; GFX90A-NEXT:    s_endpgm
943;
944; GFX940-LABEL: struct_ptr_buffer_atomic_max_noret_f64:
945; GFX940:       ; %bb.0: ; %main_body
946; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
947; GFX940-NEXT:    s_load_dword s8, s[4:5], 0x3c
948; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
949; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
950; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
951; GFX940-NEXT:    v_mov_b32_e32 v2, s8
952; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen
953; GFX940-NEXT:    s_endpgm
954main_body:
955  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
956  ret void
957}
958
959define amdgpu_ps void @struct_ptr_buffer_atomic_max_rtn_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
960; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
961; GFX90A:       ; %bb.0: ; %main_body
962; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc
963; GFX90A-NEXT:    s_waitcnt vmcnt(0)
964; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
965; GFX90A-NEXT:    s_endpgm
966;
967; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64:
968; GFX940:       ; %bb.0: ; %main_body
969; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen sc0
970; GFX940-NEXT:    s_waitcnt vmcnt(0)
971; GFX940-NEXT:    flat_store_dwordx2 v[0:1], v[0:1] sc0 sc1
972; GFX940-NEXT:    s_endpgm
973main_body:
974  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
975  store double %ret, ptr undef
976  ret void
977}
978
979define amdgpu_kernel void @struct_ptr_buffer_atomic_max_rtn_f64_off4_slc(ptr addrspace(8) %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
980; GFX90A-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
981; GFX90A:       ; %bb.0: ; %main_body
982; GFX90A-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
983; GFX90A-NEXT:    s_load_dword s10, s[4:5], 0x3c
984; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
985; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
986; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
987; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[6:7], s[6:7] op_sel:[0,1]
988; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
989; GFX90A-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 glc slc
990; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
991; GFX90A-NEXT:    s_waitcnt vmcnt(0)
992; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
993; GFX90A-NEXT:    s_endpgm
994;
995; GFX940-LABEL: struct_ptr_buffer_atomic_max_rtn_f64_off4_slc:
996; GFX940:       ; %bb.0: ; %main_body
997; GFX940-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x34
998; GFX940-NEXT:    s_load_dword s10, s[4:5], 0x3c
999; GFX940-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
1000; GFX940-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x44
1001; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1002; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[6:7]
1003; GFX940-NEXT:    v_mov_b32_e32 v2, s10
1004; GFX940-NEXT:    buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen offset:4 sc0 nt
1005; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1006; GFX940-NEXT:    s_waitcnt vmcnt(0)
1007; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9] sc0 sc1
1008; GFX940-NEXT:    s_endpgm
1009main_body:
1010  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
1011  store double %ret, ptr addrspace(1) %out, align 8
1012  ret void
1013}
1014
1015define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 {
1016; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
1017; GFX90A:       ; %bb.0: ; %main_body
1018; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1019; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1020; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1021; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1022; GFX90A-NEXT:    buffer_wbl2
1023; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1024; GFX90A-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1025; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1026; GFX90A-NEXT:    buffer_invl2
1027; GFX90A-NEXT:    buffer_wbinvl1_vol
1028; GFX90A-NEXT:    s_endpgm
1029;
1030; GFX940-LABEL: global_atomic_fadd_f64_noret_pat:
1031; GFX940:       ; %bb.0: ; %main_body
1032; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1033; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1034; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1035; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1036; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1037; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
1038; GFX940-NEXT:    s_waitcnt vmcnt(0)
1039; GFX940-NEXT:    buffer_inv sc0 sc1
1040; GFX940-NEXT:    s_endpgm
1041main_body:
1042  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
1043  ret void
1044}
1045
1046define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 {
1047; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
1048; GFX90A:       ; %bb.0: ; %main_body
1049; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1050; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1051; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1052; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1053; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1054; GFX90A-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1055; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1056; GFX90A-NEXT:    buffer_wbinvl1_vol
1057; GFX90A-NEXT:    s_endpgm
1058;
1059; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent:
1060; GFX940:       ; %bb.0: ; %main_body
1061; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1062; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1063; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1064; GFX940-NEXT:    buffer_wbl2 sc1
1065; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1066; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1067; GFX940-NEXT:    s_waitcnt vmcnt(0)
1068; GFX940-NEXT:    buffer_inv sc1
1069; GFX940-NEXT:    s_endpgm
1070main_body:
1071  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1072  ret void
1073}
1074
1075define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 {
1076; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
1077; GFX90A:       ; %bb.0: ; %main_body
1078; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1079; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1080; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1081; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1082; GFX90A-NEXT:    buffer_wbl2
1083; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1084; GFX90A-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1085; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1086; GFX90A-NEXT:    buffer_invl2
1087; GFX90A-NEXT:    buffer_wbinvl1_vol
1088; GFX90A-NEXT:    s_endpgm
1089;
1090; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_system:
1091; GFX940:       ; %bb.0: ; %main_body
1092; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1093; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1094; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1095; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1096; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1097; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1] sc1
1098; GFX940-NEXT:    s_waitcnt vmcnt(0)
1099; GFX940-NEXT:    buffer_inv sc0 sc1
1100; GFX940-NEXT:    s_endpgm
1101main_body:
1102  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
1103  ret void
1104}
1105
1106define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 {
1107; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
1108; GFX90A:       ; %bb.0: ; %main_body
1109; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1110; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1111; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1112; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1113; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1114; GFX90A-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1115; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1116; GFX90A-NEXT:    buffer_wbinvl1_vol
1117; GFX90A-NEXT:    s_endpgm
1118;
1119; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_flush:
1120; GFX940:       ; %bb.0: ; %main_body
1121; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1122; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1123; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1124; GFX940-NEXT:    buffer_wbl2 sc1
1125; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1126; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1127; GFX940-NEXT:    s_waitcnt vmcnt(0)
1128; GFX940-NEXT:    buffer_inv sc1
1129; GFX940-NEXT:    s_endpgm
1130main_body:
1131  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1132  ret void
1133}
1134
1135define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 {
1136; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
1137; GFX90A:       ; %bb.0: ; %main_body
1138; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1140; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1141; GFX90A-NEXT:    buffer_wbl2
1142; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
1143; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1144; GFX90A-NEXT:    buffer_invl2
1145; GFX90A-NEXT:    buffer_wbinvl1_vol
1146; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1147;
1148; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat:
1149; GFX940:       ; %bb.0: ; %main_body
1150; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1152; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1153; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
1154; GFX940-NEXT:    s_waitcnt vmcnt(0)
1155; GFX940-NEXT:    buffer_inv sc0 sc1
1156; GFX940-NEXT:    s_setpc_b64 s[30:31]
1157main_body:
1158  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
1159  ret double %ret
1160}
1161
1162define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 {
1163; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
1164; GFX90A:       ; %bb.0: ; %main_body
1165; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1167; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1168; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
1169; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1170; GFX90A-NEXT:    buffer_wbinvl1_vol
1171; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1172;
1173; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
1174; GFX940:       ; %bb.0: ; %main_body
1175; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1176; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1177; GFX940-NEXT:    buffer_wbl2 sc1
1178; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0
1179; GFX940-NEXT:    s_waitcnt vmcnt(0)
1180; GFX940-NEXT:    buffer_inv sc1
1181; GFX940-NEXT:    s_setpc_b64 s[30:31]
1182main_body:
1183  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0
1184  ret double %ret
1185}
1186
1187define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 {
1188; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
1189; GFX90A:       ; %bb.0: ; %main_body
1190; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1192; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1193; GFX90A-NEXT:    buffer_wbl2
1194; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
1195; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1196; GFX90A-NEXT:    buffer_invl2
1197; GFX90A-NEXT:    buffer_wbinvl1_vol
1198; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX940-LABEL: global_atomic_fadd_f64_rtn_pat_system:
1201; GFX940:       ; %bb.0: ; %main_body
1202; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1204; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1205; GFX940-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off sc0 sc1
1206; GFX940-NEXT:    s_waitcnt vmcnt(0)
1207; GFX940-NEXT:    buffer_inv sc0 sc1
1208; GFX940-NEXT:    s_setpc_b64 s[30:31]
1209main_body:
1210  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst, !amdgpu.no.fine.grained.memory !0
1211  ret double %ret
1212}
1213
1214define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) {
1215; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
1216; GFX90A:       ; %bb.0: ; %main_body
1217; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1218; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
1219; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1220; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1221; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
1222; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1223; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
1224; GFX90A-NEXT:  .LBB43_1: ; %atomicrmw.start
1225; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
1226; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
1227; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
1228; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1229; GFX90A-NEXT:    buffer_wbinvl1_vol
1230; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1231; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
1232; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1233; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
1234; GFX90A-NEXT:    s_cbranch_execnz .LBB43_1
1235; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
1236; GFX90A-NEXT:    s_endpgm
1237;
1238; GFX940-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
1239; GFX940:       ; %bb.0: ; %main_body
1240; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1241; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1242; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1243; GFX940-NEXT:    buffer_wbl2 sc1
1244; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1245; GFX940-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
1246; GFX940-NEXT:    s_waitcnt vmcnt(0)
1247; GFX940-NEXT:    buffer_inv sc1
1248; GFX940-NEXT:    s_endpgm
1249main_body:
1250  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
1251  ret void
1252}
1253
1254define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
1255; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
1256; GFX90A:       ; %bb.0: ; %main_body
1257; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1258; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1259; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1260; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1261; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
1262; GFX90A-NEXT:    buffer_wbl2
1263; GFX90A-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
1264; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1265; GFX90A-NEXT:    buffer_invl2
1266; GFX90A-NEXT:    buffer_wbinvl1_vol
1267; GFX90A-NEXT:    s_endpgm
1268;
1269; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat:
1270; GFX940:       ; %bb.0: ; %main_body
1271; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1272; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1273; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1274; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
1275; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1276; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
1277; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1278; GFX940-NEXT:    buffer_inv sc0 sc1
1279; GFX940-NEXT:    s_endpgm
1280main_body:
1281  %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1282  ret void
1283}
1284
1285define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
1286; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1287; GFX90A:       ; %bb.0: ; %main_body
1288; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1289; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1290; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1291; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1292; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
1293; GFX90A-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
1294; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1295; GFX90A-NEXT:    buffer_wbinvl1_vol
1296; GFX90A-NEXT:    s_endpgm
1297;
1298; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1299; GFX940:       ; %bb.0: ; %main_body
1300; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1301; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1302; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1303; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
1304; GFX940-NEXT:    buffer_wbl2 sc1
1305; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
1306; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1307; GFX940-NEXT:    buffer_inv sc1
1308; GFX940-NEXT:    s_endpgm
1309main_body:
1310  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1311  ret void
1312}
1313
1314define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
1315; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1316; GFX90A:       ; %bb.0: ; %main_body
1317; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1318; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1319; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1320; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1321; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[0:1], s[0:1] op_sel:[0,1]
1322; GFX90A-NEXT:    buffer_wbl2
1323; GFX90A-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
1324; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1325; GFX90A-NEXT:    buffer_invl2
1326; GFX90A-NEXT:    buffer_wbinvl1_vol
1327; GFX90A-NEXT:    s_endpgm
1328;
1329; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1330; GFX940:       ; %bb.0: ; %main_body
1331; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1332; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1333; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1334; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
1335; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1336; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1] sc1
1337; GFX940-NEXT:    s_waitcnt vmcnt(0)
1338; GFX940-NEXT:    buffer_inv sc0 sc1
1339; GFX940-NEXT:    s_endpgm
1340main_body:
1341  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1342  ret void
1343}
1344
1345define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
1346; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
1347; GFX90A:       ; %bb.0: ; %main_body
1348; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1350; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1351; GFX90A-NEXT:    buffer_wbl2
1352; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
1353; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1354; GFX90A-NEXT:    buffer_invl2
1355; GFX90A-NEXT:    buffer_wbinvl1_vol
1356; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat:
1359; GFX940:       ; %bb.0: ; %main_body
1360; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1362; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1363; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
1364; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1365; GFX940-NEXT:    buffer_inv sc0 sc1
1366; GFX940-NEXT:    s_setpc_b64 s[30:31]
1367main_body:
1368  %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1369  ret double %ret
1370}
1371
1372define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
1373; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
1374; GFX90A:       ; %bb.0: ; %main_body
1375; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1376; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1377; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1378; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
1379; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1380; GFX90A-NEXT:    buffer_wbinvl1_vol
1381; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1382;
1383; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
1384; GFX940:       ; %bb.0: ; %main_body
1385; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1386; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1387; GFX940-NEXT:    buffer_wbl2 sc1
1388; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0
1389; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1390; GFX940-NEXT:    buffer_inv sc1
1391; GFX940-NEXT:    s_setpc_b64 s[30:31]
1392main_body:
1393  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1394  ret double %ret
1395}
1396
1397define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
1398; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
1399; GFX90A:       ; %bb.0: ; %main_body
1400; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1402; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1403; GFX90A-NEXT:    buffer_wbl2
1404; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
1405; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1406; GFX90A-NEXT:    buffer_invl2
1407; GFX90A-NEXT:    buffer_wbinvl1_vol
1408; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1409; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1410;
1411; GFX940-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
1412; GFX940:       ; %bb.0: ; %main_body
1413; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1415; GFX940-NEXT:    buffer_wbl2 sc0 sc1
1416; GFX940-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] sc0 sc1
1417; GFX940-NEXT:    s_waitcnt vmcnt(0)
1418; GFX940-NEXT:    buffer_inv sc0 sc1
1419; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1420; GFX940-NEXT:    s_setpc_b64 s[30:31]
1421main_body:
1422  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
1423  ret double %ret
1424}
1425
1426define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
1427; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1428; GFX90A:       ; %bb.0: ; %main_body
1429; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
1430; GFX90A-NEXT:    s_mov_b64 s[0:1], 0
1431; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1432; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
1433; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
1434; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], s[2:3], s[2:3] op_sel:[0,1]
1435; GFX90A-NEXT:  .LBB50_1: ; %atomicrmw.start
1436; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
1437; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1438; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
1439; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
1440; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1441; GFX90A-NEXT:    buffer_wbinvl1_vol
1442; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
1443; GFX90A-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1444; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
1445; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1446; GFX90A-NEXT:    s_cbranch_execnz .LBB50_1
1447; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
1448; GFX90A-NEXT:    s_endpgm
1449;
1450; GFX940-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1451; GFX940:       ; %bb.0: ; %main_body
1452; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
1453; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1454; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1455; GFX940-NEXT:    v_mov_b64_e32 v[2:3], s[0:1]
1456; GFX940-NEXT:    buffer_wbl2 sc1
1457; GFX940-NEXT:    flat_atomic_add_f64 v[2:3], v[0:1]
1458; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
1459; GFX940-NEXT:    buffer_inv sc1
1460; GFX940-NEXT:    s_endpgm
1461main_body:
1462  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst, !noalias.addrspace !1
1463  ret void
1464}
1465
1466define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) {
1467; GFX90A-LABEL: local_atomic_fadd_f64_noret:
1468; GFX90A:       ; %bb.0: ; %main_body
1469; GFX90A-NEXT:    s_load_dword s2, s[4:5], 0x24
1470; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
1471; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1472; GFX90A-NEXT:    v_mov_b32_e32 v2, s2
1473; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1474; GFX90A-NEXT:    ds_add_f64 v2, v[0:1]
1475; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1476; GFX90A-NEXT:    s_endpgm
1477;
1478; GFX940-LABEL: local_atomic_fadd_f64_noret:
1479; GFX940:       ; %bb.0: ; %main_body
1480; GFX940-NEXT:    s_load_dword s2, s[4:5], 0x24
1481; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
1482; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1483; GFX940-NEXT:    v_mov_b32_e32 v2, s2
1484; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
1485; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
1486; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1487; GFX940-NEXT:    s_endpgm
1488main_body:
1489  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
1490  ret void
1491}
1492
1493define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
1494; GFX90A-LABEL: local_atomic_fadd_f64_rtn:
1495; GFX90A:       ; %bb.0: ; %main_body
1496; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1497; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1498; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
1499; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1500; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1501; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1502;
1503; GFX940-LABEL: local_atomic_fadd_f64_rtn:
1504; GFX940:       ; %bb.0: ; %main_body
1505; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1506; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1507; GFX940-NEXT:    v_mov_b32_e32 v2, v1
1508; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1509; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1510; GFX940-NEXT:    s_setpc_b64 s[30:31]
1511main_body:
1512  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
1513  ret double %ret
1514}
1515
1516define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
1517; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
1518; GFX90A:       ; %bb.0: ; %main_body
1519; GFX90A-NEXT:    s_load_dword s0, s[4:5], 0x24
1520; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1521; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1522; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1523; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
1524; GFX90A-NEXT:    ds_add_f64 v2, v[0:1]
1525; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1526; GFX90A-NEXT:    s_endpgm
1527;
1528; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
1529; GFX940:       ; %bb.0: ; %main_body
1530; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
1531; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1532; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1533; GFX940-NEXT:    v_mov_b32_e32 v2, s0
1534; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
1535; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1536; GFX940-NEXT:    s_endpgm
1537main_body:
1538  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
1539  ret void
1540}
1541
1542define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 {
1543; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
1544; GFX90A:       ; %bb.0: ; %main_body
1545; GFX90A-NEXT:    s_load_dword s0, s[4:5], 0x24
1546; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1547; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1548; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1549; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
1550; GFX90A-NEXT:    ds_add_f64 v2, v[0:1]
1551; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1552; GFX90A-NEXT:    s_endpgm
1553;
1554; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
1555; GFX940:       ; %bb.0: ; %main_body
1556; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
1557; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1558; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1559; GFX940-NEXT:    v_mov_b32_e32 v2, s0
1560; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
1561; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1562; GFX940-NEXT:    s_endpgm
1563main_body:
1564  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
1565  ret void
1566}
1567
1568define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
1569; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
1570; GFX90A:       ; %bb.0: ; %main_body
1571; GFX90A-NEXT:    s_load_dword s0, s[4:5], 0x24
1572; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
1573; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
1574; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1575; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
1576; GFX90A-NEXT:    ds_add_f64 v2, v[0:1]
1577; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1578; GFX90A-NEXT:    s_endpgm
1579;
1580; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
1581; GFX940:       ; %bb.0: ; %main_body
1582; GFX940-NEXT:    s_load_dword s0, s[4:5], 0x24
1583; GFX940-NEXT:    v_mov_b64_e32 v[0:1], 4.0
1584; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1585; GFX940-NEXT:    v_mov_b32_e32 v2, s0
1586; GFX940-NEXT:    ds_add_f64 v2, v[0:1]
1587; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1588; GFX940-NEXT:    s_endpgm
1589main_body:
1590  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
1591  ret void
1592}
1593
1594define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 {
1595; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
1596; GFX90A:       ; %bb.0: ; %main_body
1597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1598; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1599; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
1600; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1601; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1602; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1603;
1604; GFX940-LABEL: local_atomic_fadd_f64_rtn_pat:
1605; GFX940:       ; %bb.0: ; %main_body
1606; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607; GFX940-NEXT:    v_mov_b64_e32 v[2:3], 4.0
1608; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1609; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1610; GFX940-NEXT:    s_setpc_b64 s[30:31]
1611main_body:
1612  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst, !amdgpu.no.fine.grained.memory !0
1613  ret double %ret
1614}
1615
1616define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
1617; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
1618; GFX90A:       ; %bb.0: ; %main_body
1619; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1621; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
1622; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1623; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1624; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1625;
1626; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
1627; GFX940:       ; %bb.0: ; %main_body
1628; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1629; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1630; GFX940-NEXT:    v_mov_b32_e32 v2, v1
1631; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1632; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1633; GFX940-NEXT:    s_setpc_b64 s[30:31]
1634main_body:
1635  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
1636  ret double %ret
1637}
1638
1639define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
1640; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
1641; GFX90A:       ; %bb.0: ; %main_body
1642; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1643; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1644; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
1645; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1646; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
1647; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1648;
1649; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
1650; GFX940:       ; %bb.0: ; %main_body
1651; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1653; GFX940-NEXT:    v_mov_b32_e32 v2, v1
1654; GFX940-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[2:3]
1655; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
1656; GFX940-NEXT:    s_setpc_b64 s[30:31]
1657main_body:
1658  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
1659  ret double %ret
1660}
1661
1662attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
1663attributes #1 = { nounwind }
1664attributes #2 = { "denormal-fp-math"="ieee,ieee" }
1665attributes #3 = { "denormal-fp-math"="ieee,ieee" }
1666attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
1667
1668!0 = !{}
1669!1 = !{i32 5, i32 6}
1670