xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
4; Not supported in gfx8 or gfx9, except  90a/940
5; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A %s
6; xUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX940 %s
7
8define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
9; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc
13; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
14; GFX6-NEXT:    s_setpc_b64 s[30:31]
15;
16; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
17; GFX7:       ; %bb.0:
18; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc
20; GFX7-NEXT:    s_waitcnt vmcnt(0)
21; GFX7-NEXT:    s_setpc_b64 s[30:31]
22  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
23  ret double %ret
24}
25
26define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
27; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
28; GFX6:       ; %bb.0:
29; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256 glc
31; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
32; GFX6-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
35; GFX7:       ; %bb.0:
36; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256 glc
38; GFX7-NEXT:    s_waitcnt vmcnt(0)
39; GFX7-NEXT:    s_setpc_b64 s[30:31]
40  %voffset.add = add i32 %voffset, 256
41  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
42  ret double %ret
43}
44
45define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
46; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
47; GFX6:       ; %bb.0:
48; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen glc
50; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
51; GFX6-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
54; GFX7:       ; %bb.0:
55; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen glc
57; GFX7-NEXT:    s_waitcnt vmcnt(0)
58; GFX7-NEXT:    s_setpc_b64 s[30:31]
59  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
60  ret double %ret
61}
62
63define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
64; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
65; GFX6:       ; %bb.0:
66; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc slc
68; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
69; GFX6-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
72; GFX7:       ; %bb.0:
73; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen glc slc
75; GFX7-NEXT:    s_waitcnt vmcnt(0)
76; GFX7-NEXT:    s_setpc_b64 s[30:31]
77  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
78  ret double %ret
79}
80
81define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
82; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
83; GFX6:       ; %bb.0:
84; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen
86; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
87; GFX6-NEXT:    s_setpc_b64 s[30:31]
88;
89; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset:
90; GFX7:       ; %bb.0:
91; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
92; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen
93; GFX7-NEXT:    s_waitcnt vmcnt(0)
94; GFX7-NEXT:    s_setpc_b64 s[30:31]
95  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
96  ret void
97}
98
99define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
100; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
101; GFX6:       ; %bb.0:
102; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256
104; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
105; GFX6-NEXT:    s_setpc_b64 s[30:31]
106;
107; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
108; GFX7:       ; %bb.0:
109; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen offset:256
111; GFX7-NEXT:    s_waitcnt vmcnt(0)
112; GFX7-NEXT:    s_setpc_b64 s[30:31]
113  %voffset.add = add i32 %voffset, 256
114  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
115  ret void
116}
117
118; Natural mapping, no voffset
119define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
120; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
121; GFX6:       ; %bb.0:
122; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen
124; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
125; GFX6-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset:
128; GFX7:       ; %bb.0:
129; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v2, s[16:19], s20 idxen
131; GFX7-NEXT:    s_waitcnt vmcnt(0)
132; GFX7-NEXT:    s_setpc_b64 s[30:31]
133  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
134  ret void
135}
136
137define void @struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
138; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
139; GFX6:       ; %bb.0:
140; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen slc
142; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
143; GFX6-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc:
146; GFX7:       ; %bb.0:
147; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[2:3], s[16:19], s20 idxen offen slc
149; GFX7-NEXT:    s_waitcnt vmcnt(0)
150; GFX7-NEXT:    s_setpc_b64 s[30:31]
151  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
152  ret void
153}
154
155; Test waterfall loop on resource
156define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
157; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
158; GFX6:       ; %bb.0:
159; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160; GFX6-NEXT:    s_mov_b64 s[6:7], exec
161; GFX6-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
162; GFX6-NEXT:    v_readfirstlane_b32 s8, v2
163; GFX6-NEXT:    v_readfirstlane_b32 s9, v3
164; GFX6-NEXT:    v_readfirstlane_b32 s10, v4
165; GFX6-NEXT:    v_readfirstlane_b32 s11, v5
166; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
167; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
168; GFX6-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
169; GFX6-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
170; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
171; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s16 idxen offen offset:256 glc
172; GFX6-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
173; GFX6-NEXT:    ; implicit-def: $vgpr6_vgpr7
174; GFX6-NEXT:    s_xor_b64 exec, exec, s[4:5]
175; GFX6-NEXT:    s_cbranch_execnz .LBB8_1
176; GFX6-NEXT:  ; %bb.2:
177; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
178; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
179; GFX6-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmax__sgpr_soffset:
182; GFX7:       ; %bb.0:
183; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX7-NEXT:    s_mov_b64 s[6:7], exec
185; GFX7-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
186; GFX7-NEXT:    v_readfirstlane_b32 s8, v2
187; GFX7-NEXT:    v_readfirstlane_b32 s9, v3
188; GFX7-NEXT:    v_readfirstlane_b32 s10, v4
189; GFX7-NEXT:    v_readfirstlane_b32 s11, v5
190; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
191; GFX7-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
192; GFX7-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
193; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
194; GFX7-NEXT:    s_waitcnt vmcnt(0)
195; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s16 idxen offen offset:256 glc
196; GFX7-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
197; GFX7-NEXT:    ; implicit-def: $vgpr6_vgpr7
198; GFX7-NEXT:    s_xor_b64 exec, exec, s[4:5]
199; GFX7-NEXT:    s_cbranch_execnz .LBB8_1
200; GFX7-NEXT:  ; %bb.2:
201; GFX7-NEXT:    s_mov_b64 exec, s[6:7]
202; GFX7-NEXT:    s_waitcnt vmcnt(0)
203; GFX7-NEXT:    s_setpc_b64 s[30:31]
204  %voffset.add = add i32 %voffset, 256
205  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
206  ret double %ret
207}
208
209; Test waterfall loop on soffset
210define double @struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
211; GFX6-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset:
212; GFX6:       ; %bb.0:
213; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX6-NEXT:    s_mov_b64 s[6:7], exec
215; GFX6-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
216; GFX6-NEXT:    v_readfirstlane_b32 s8, v2
217; GFX6-NEXT:    v_readfirstlane_b32 s9, v3
218; GFX6-NEXT:    v_readfirstlane_b32 s10, v4
219; GFX6-NEXT:    v_readfirstlane_b32 s11, v5
220; GFX6-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
221; GFX6-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
222; GFX6-NEXT:    v_readfirstlane_b32 s12, v8
223; GFX6-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
224; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s12, v8
225; GFX6-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
226; GFX6-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
227; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
228; GFX6-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s12 idxen offen offset:256 glc
229; GFX6-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
230; GFX6-NEXT:    ; implicit-def: $vgpr8
231; GFX6-NEXT:    ; implicit-def: $vgpr6_vgpr7
232; GFX6-NEXT:    s_xor_b64 exec, exec, s[4:5]
233; GFX6-NEXT:    s_cbranch_execnz .LBB9_1
234; GFX6-NEXT:  ; %bb.2:
235; GFX6-NEXT:    s_mov_b64 exec, s[6:7]
236; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
237; GFX6-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX7-LABEL: struct_ptr_buffer_atomic_fmax_f64_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmax__vgpr_soffset:
240; GFX7:       ; %bb.0:
241; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX7-NEXT:    s_mov_b64 s[6:7], exec
243; GFX7-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
244; GFX7-NEXT:    v_readfirstlane_b32 s8, v2
245; GFX7-NEXT:    v_readfirstlane_b32 s9, v3
246; GFX7-NEXT:    v_readfirstlane_b32 s10, v4
247; GFX7-NEXT:    v_readfirstlane_b32 s11, v5
248; GFX7-NEXT:    v_cmp_eq_u64_e32 vcc, s[8:9], v[2:3]
249; GFX7-NEXT:    v_cmp_eq_u64_e64 s[4:5], s[10:11], v[4:5]
250; GFX7-NEXT:    v_readfirstlane_b32 s12, v8
251; GFX7-NEXT:    s_and_b64 s[4:5], vcc, s[4:5]
252; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, s12, v8
253; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
254; GFX7-NEXT:    s_and_saveexec_b64 s[4:5], s[4:5]
255; GFX7-NEXT:    s_waitcnt vmcnt(0)
256; GFX7-NEXT:    buffer_atomic_fmax_x2 v[0:1], v[6:7], s[8:11], s12 idxen offen offset:256 glc
257; GFX7-NEXT:    ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5
258; GFX7-NEXT:    ; implicit-def: $vgpr8
259; GFX7-NEXT:    ; implicit-def: $vgpr6_vgpr7
260; GFX7-NEXT:    s_xor_b64 exec, exec, s[4:5]
261; GFX7-NEXT:    s_cbranch_execnz .LBB9_1
262; GFX7-NEXT:  ; %bb.2:
263; GFX7-NEXT:    s_mov_b64 exec, s[6:7]
264; GFX7-NEXT:    s_waitcnt vmcnt(0)
265; GFX7-NEXT:    s_setpc_b64 s[30:31]
266  %voffset.add = add i32 %voffset, 256
267  %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset.add, i32 %soffset, i32 0)
268  ret double %ret
269}
270
271declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
272