xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
4
5define amdgpu_ps void @buffer_store(ptr addrspace(8) inreg, <4 x float>, <4 x float>, <4 x float>) {
6; VERDE-LABEL: buffer_store:
7; VERDE:       ; %bb.0: ; %main_body
8; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
9; VERDE-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
10; VERDE-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
11; VERDE-NEXT:    s_endpgm
12;
13; CHECK-LABEL: buffer_store:
14; CHECK:       ; %bb.0: ; %main_body
15; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
16; CHECK-NEXT:    buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc
17; CHECK-NEXT:    buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc
18; CHECK-NEXT:    s_endpgm
19main_body:
20  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 0, i32 0, i32 0)
21  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %2, ptr addrspace(8) %0, i32 0, i32 0, i32 1)
22  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %3, ptr addrspace(8) %0, i32 0, i32 0, i32 2)
23  ret void
24}
25
26define amdgpu_ps void @buffer_store_immoffs(ptr addrspace(8) inreg, <4 x float>) {
27; VERDE-LABEL: buffer_store_immoffs:
28; VERDE:       ; %bb.0: ; %main_body
29; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
30; VERDE-NEXT:    s_endpgm
31;
32; CHECK-LABEL: buffer_store_immoffs:
33; CHECK:       ; %bb.0: ; %main_body
34; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42
35; CHECK-NEXT:    s_endpgm
36main_body:
37  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 42, i32 0, i32 0)
38  ret void
39}
40
41define amdgpu_ps void @buffer_store_ofs(ptr addrspace(8) inreg, <4 x float>, i32) {
42; VERDE-LABEL: buffer_store_ofs:
43; VERDE:       ; %bb.0: ; %main_body
44; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
45; VERDE-NEXT:    s_endpgm
46;
47; CHECK-LABEL: buffer_store_ofs:
48; CHECK:       ; %bb.0: ; %main_body
49; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
50; CHECK-NEXT:    s_endpgm
51main_body:
52  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 %2, i32 0, i32 0)
53  ret void
54}
55
56; Ideally, the register allocator would avoid the wait here
57define amdgpu_ps void @buffer_store_wait(ptr addrspace(8) inreg, <4 x float>, i32, i32, i32) {
58; VERDE-LABEL: buffer_store_wait:
59; VERDE:       ; %bb.0: ; %main_body
60; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
61; VERDE-NEXT:    s_waitcnt expcnt(0)
62; VERDE-NEXT:    buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
63; VERDE-NEXT:    s_waitcnt vmcnt(0)
64; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
65; VERDE-NEXT:    s_endpgm
66;
67; CHECK-LABEL: buffer_store_wait:
68; CHECK:       ; %bb.0: ; %main_body
69; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
70; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen
71; CHECK-NEXT:    s_waitcnt vmcnt(0)
72; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen
73; CHECK-NEXT:    s_endpgm
74main_body:
75  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 %2, i32 0, i32 0)
76  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %3, i32 0, i32 0)
77  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %data, ptr addrspace(8) %0, i32 %4, i32 0, i32 0)
78  ret void
79}
80
81define amdgpu_ps void @buffer_store_x1(ptr addrspace(8) inreg %rsrc, float %data, i32 %offset) {
82; VERDE-LABEL: buffer_store_x1:
83; VERDE:       ; %bb.0: ; %main_body
84; VERDE-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
85; VERDE-NEXT:    s_endpgm
86;
87; CHECK-LABEL: buffer_store_x1:
88; CHECK:       ; %bb.0: ; %main_body
89; CHECK-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
90; CHECK-NEXT:    s_endpgm
91main_body:
92  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
93  ret void
94}
95
96define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %offset) #0 {
97; VERDE-LABEL: buffer_store_x2:
98; VERDE:       ; %bb.0: ; %main_body
99; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
100; VERDE-NEXT:    s_endpgm
101;
102; CHECK-LABEL: buffer_store_x2:
103; CHECK:       ; %bb.0: ; %main_body
104; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
105; CHECK-NEXT:    s_endpgm
106main_body:
107  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
108  ret void
109}
110
111define amdgpu_ps void @buffer_store_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
112; VERDE-LABEL: buffer_store_x1_offen_merged_and:
113; VERDE:       ; %bb.0:
114; VERDE-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
115; VERDE-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
116; VERDE-NEXT:    s_endpgm
117;
118; CHECK-LABEL: buffer_store_x1_offen_merged_and:
119; CHECK:       ; %bb.0:
120; CHECK-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
121; CHECK-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
122; CHECK-NEXT:    s_endpgm
123  %a1 = add i32 %a, 4
124  %a2 = add i32 %a, 8
125  %a3 = add i32 %a, 12
126  %a4 = add i32 %a, 16
127  %a5 = add i32 %a, 28
128  %a6 = add i32 %a, 32
129  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
130  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
131  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
132  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
133  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
134  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
135  ret void
136}
137
138define amdgpu_ps void @buffer_store_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
139; VERDE-LABEL: buffer_store_x1_offen_merged_or:
140; VERDE:       ; %bb.0:
141; VERDE-NEXT:    v_lshlrev_b32_e32 v0, 6, v0
142; VERDE-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
143; VERDE-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
144; VERDE-NEXT:    s_endpgm
145;
146; CHECK-LABEL: buffer_store_x1_offen_merged_or:
147; CHECK:       ; %bb.0:
148; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 6, v0
149; CHECK-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
150; CHECK-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
151; CHECK-NEXT:    s_endpgm
152  %a = shl i32 %inp, 6
153  %a1 = add i32 %a, 4
154  %a2 = add i32 %a, 8
155  %a3 = add i32 %a, 12
156  %a4 = add i32 %a, 16
157  %a5 = add i32 %a, 28
158  %a6 = add i32 %a, 32
159  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
160  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
161  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
162  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
163  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
164  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
165  ret void
166}
167
168
169define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
170; VERDE-LABEL: buffer_store_x1_offen_merged_glc_slc:
171; VERDE:       ; %bb.0:
172; VERDE-NEXT:    buffer_store_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
173; VERDE-NEXT:    buffer_store_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
174; VERDE-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
175; VERDE-NEXT:    s_endpgm
176;
177; CHECK-LABEL: buffer_store_x1_offen_merged_glc_slc:
178; CHECK:       ; %bb.0:
179; CHECK-NEXT:    buffer_store_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
180; CHECK-NEXT:    buffer_store_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
181; CHECK-NEXT:    buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
182; CHECK-NEXT:    s_endpgm
183  %a1 = add i32 %a, 4
184  %a2 = add i32 %a, 8
185  %a3 = add i32 %a, 12
186  %a4 = add i32 %a, 16
187  %a5 = add i32 %a, 28
188  %a6 = add i32 %a, 32
189  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
190  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
191  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1)
192  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1)
193  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3)
194  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3)
195  ret void
196}
197
198define amdgpu_ps void @buffer_store_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) {
199; VERDE-LABEL: buffer_store_x2_offen_merged_and:
200; VERDE:       ; %bb.0:
201; VERDE-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
202; VERDE-NEXT:    s_endpgm
203;
204; CHECK-LABEL: buffer_store_x2_offen_merged_and:
205; CHECK:       ; %bb.0:
206; CHECK-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
207; CHECK-NEXT:    s_endpgm
208  %a1 = add i32 %a, 4
209  %a2 = add i32 %a, 12
210  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
211  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
212  ret void
213}
214
215define amdgpu_ps void @buffer_store_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) {
216; VERDE-LABEL: buffer_store_x2_offen_merged_or:
217; VERDE:       ; %bb.0:
218; VERDE-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
219; VERDE-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
220; VERDE-NEXT:    s_endpgm
221;
222; CHECK-LABEL: buffer_store_x2_offen_merged_or:
223; CHECK:       ; %bb.0:
224; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
225; CHECK-NEXT:    buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
226; CHECK-NEXT:    s_endpgm
227  %a = shl i32 %inp, 4
228  %a1 = add i32 %a, 4
229  %a2 = add i32 %a, 12
230  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
231  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
232  ret void
233}
234
235define amdgpu_ps void @buffer_store_x1_offset_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
236; VERDE-LABEL: buffer_store_x1_offset_merged:
237; VERDE:       ; %bb.0:
238; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
239; VERDE-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
240; VERDE-NEXT:    s_endpgm
241;
242; CHECK-LABEL: buffer_store_x1_offset_merged:
243; CHECK:       ; %bb.0:
244; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
245; CHECK-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
246; CHECK-NEXT:    s_endpgm
247  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
248  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
249  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
250  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
251  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
252  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
253  ret void
254}
255
256define amdgpu_ps void @buffer_store_x2_offset_merged(ptr addrspace(8) inreg %rsrc, <2 x float> %v1,<2 x float> %v2) {
257; VERDE-LABEL: buffer_store_x2_offset_merged:
258; VERDE:       ; %bb.0:
259; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
260; VERDE-NEXT:    s_endpgm
261;
262; CHECK-LABEL: buffer_store_x2_offset_merged:
263; CHECK:       ; %bb.0:
264; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
265; CHECK-NEXT:    s_endpgm
266  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
267  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
268  ret void
269}
270
271define amdgpu_ps void @buffer_store_int(ptr addrspace(8) inreg, <4 x i32>, <2 x i32>, i32) {
272; VERDE-LABEL: buffer_store_int:
273; VERDE:       ; %bb.0: ; %main_body
274; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
275; VERDE-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
276; VERDE-NEXT:    buffer_store_dword v6, off, s[0:3], 0 slc
277; VERDE-NEXT:    s_endpgm
278;
279; CHECK-LABEL: buffer_store_int:
280; CHECK:       ; %bb.0: ; %main_body
281; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
282; CHECK-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
283; CHECK-NEXT:    buffer_store_dword v6, off, s[0:3], 0 slc
284; CHECK-NEXT:    s_endpgm
285main_body:
286  call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %1, ptr addrspace(8) %0, i32 0, i32 0, i32 0)
287  call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %2, ptr addrspace(8) %0, i32 0, i32 0, i32 1)
288  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %3, ptr addrspace(8) %0, i32 0, i32 0, i32 2)
289  ret void
290}
291
292define amdgpu_ps void @raw_ptr_buffer_store_byte(ptr addrspace(8) inreg %rsrc, float %v1) {
293; VERDE-LABEL: raw_ptr_buffer_store_byte:
294; VERDE:       ; %bb.0: ; %main_body
295; VERDE-NEXT:    v_cvt_u32_f32_e32 v0, v0
296; VERDE-NEXT:    buffer_store_byte v0, off, s[0:3], 0
297; VERDE-NEXT:    s_endpgm
298;
299; CHECK-LABEL: raw_ptr_buffer_store_byte:
300; CHECK:       ; %bb.0: ; %main_body
301; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
302; CHECK-NEXT:    buffer_store_byte v0, off, s[0:3], 0
303; CHECK-NEXT:    s_endpgm
304main_body:
305  %v2 = fptoui float %v1 to i32
306  %v3 = trunc i32 %v2 to i8
307  call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %v3, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
308  ret void
309}
310
311define amdgpu_ps void @raw_ptr_buffer_store_short(ptr addrspace(8) inreg %rsrc, float %v1) {
312; VERDE-LABEL: raw_ptr_buffer_store_short:
313; VERDE:       ; %bb.0: ; %main_body
314; VERDE-NEXT:    v_cvt_u32_f32_e32 v0, v0
315; VERDE-NEXT:    buffer_store_short v0, off, s[0:3], 0
316; VERDE-NEXT:    s_endpgm
317;
318; CHECK-LABEL: raw_ptr_buffer_store_short:
319; CHECK:       ; %bb.0: ; %main_body
320; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
321; CHECK-NEXT:    buffer_store_short v0, off, s[0:3], 0
322; CHECK-NEXT:    s_endpgm
323main_body:
324  %v2 = fptoui float %v1 to i32
325  %v3 = trunc i32 %v2 to i16
326  call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %v3, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
327  ret void
328}
329
330define amdgpu_ps void @raw_ptr_buffer_store_f16(ptr addrspace(8) inreg %rsrc, i32 %v1) {
331; VERDE-LABEL: raw_ptr_buffer_store_f16:
332; VERDE:       ; %bb.0: ; %main_body
333; VERDE-NEXT:    buffer_store_short v0, off, s[0:3], 0
334; VERDE-NEXT:    s_endpgm
335;
336; CHECK-LABEL: raw_ptr_buffer_store_f16:
337; CHECK:       ; %bb.0: ; %main_body
338; CHECK-NEXT:    buffer_store_short v0, off, s[0:3], 0
339; CHECK-NEXT:    s_endpgm
340main_body:
341  %trunc = trunc i32 %v1 to i16
342  %cast = bitcast i16 %trunc to half
343  call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half %cast, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
344  ret void
345}
346
347define amdgpu_ps void @buffer_store_v2f16(ptr addrspace(8) inreg %rsrc, <2 x half> %data, i32 %offset) {
348; VERDE-LABEL: buffer_store_v2f16:
349; VERDE:       ; %bb.0: ; %main_body
350; VERDE-NEXT:    v_cvt_f16_f32_e32 v1, v1
351; VERDE-NEXT:    v_cvt_f16_f32_e32 v0, v0
352; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
353; VERDE-NEXT:    v_or_b32_e32 v0, v0, v1
354; VERDE-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
355; VERDE-NEXT:    s_endpgm
356;
357; CHECK-LABEL: buffer_store_v2f16:
358; CHECK:       ; %bb.0: ; %main_body
359; CHECK-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
360; CHECK-NEXT:    s_endpgm
361main_body:
362  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
363  ret void
364}
365
366define amdgpu_ps void @buffer_store_v4f16(ptr addrspace(8) inreg %rsrc, <4 x half> %data, i32 %offset) #0 {
367; VERDE-LABEL: buffer_store_v4f16:
368; VERDE:       ; %bb.0: ; %main_body
369; VERDE-NEXT:    v_cvt_f16_f32_e32 v3, v3
370; VERDE-NEXT:    v_cvt_f16_f32_e32 v2, v2
371; VERDE-NEXT:    v_cvt_f16_f32_e32 v5, v1
372; VERDE-NEXT:    v_cvt_f16_f32_e32 v0, v0
373; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
374; VERDE-NEXT:    v_or_b32_e32 v1, v2, v1
375; VERDE-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
376; VERDE-NEXT:    v_or_b32_e32 v0, v0, v2
377; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v4, s[0:3], 0 offen
378; VERDE-NEXT:    s_endpgm
379;
380; CHECK-LABEL: buffer_store_v4f16:
381; CHECK:       ; %bb.0: ; %main_body
382; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
383; CHECK-NEXT:    s_endpgm
384main_body:
385  call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
386  ret void
387}
388
389define amdgpu_ps void @buffer_store_v8f16(ptr addrspace(8) inreg %rsrc, <8 x half> %data, i32 %offset) #0 {
390; VERDE-LABEL: buffer_store_v8f16:
391; VERDE:       ; %bb.0: ; %main_body
392; VERDE-NEXT:    v_cvt_f16_f32_e32 v7, v7
393; VERDE-NEXT:    v_cvt_f16_f32_e32 v6, v6
394; VERDE-NEXT:    v_cvt_f16_f32_e32 v9, v5
395; VERDE-NEXT:    v_cvt_f16_f32_e32 v3, v3
396; VERDE-NEXT:    v_cvt_f16_f32_e32 v1, v1
397; VERDE-NEXT:    v_cvt_f16_f32_e32 v4, v4
398; VERDE-NEXT:    v_cvt_f16_f32_e32 v2, v2
399; VERDE-NEXT:    v_cvt_f16_f32_e32 v0, v0
400; VERDE-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
401; VERDE-NEXT:    v_or_b32_e32 v5, v6, v5
402; VERDE-NEXT:    v_lshlrev_b32_e32 v6, 16, v9
403; VERDE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
404; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
405; VERDE-NEXT:    v_or_b32_e32 v4, v4, v6
406; VERDE-NEXT:    v_or_b32_e32 v3, v2, v3
407; VERDE-NEXT:    v_or_b32_e32 v2, v0, v1
408; VERDE-NEXT:    buffer_store_dwordx4 v[2:5], v8, s[0:3], 0 offen
409; VERDE-NEXT:    s_endpgm
410;
411; CHECK-LABEL: buffer_store_v8f16:
412; CHECK:       ; %bb.0: ; %main_body
413; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
414; CHECK-NEXT:    s_endpgm
415main_body:
416  call void @llvm.amdgcn.raw.ptr.buffer.store.v8f16(<8 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
417  ret void
418}
419
420define amdgpu_ps void @buffer_store_v2bf16(ptr addrspace(8) inreg %rsrc, <2 x bfloat> %data, i32 %offset) {
421; VERDE-LABEL: buffer_store_v2bf16:
422; VERDE:       ; %bb.0:
423; VERDE-NEXT:    v_mul_f32_e32 v1, 1.0, v1
424; VERDE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
425; VERDE-NEXT:    v_mul_f32_e32 v0, 1.0, v0
426; VERDE-NEXT:    v_alignbit_b32 v0, v1, v0, 16
427; VERDE-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
428; VERDE-NEXT:    s_endpgm
429;
430; CHECK-LABEL: buffer_store_v2bf16:
431; CHECK:       ; %bb.0:
432; CHECK-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
433; CHECK-NEXT:    s_endpgm
434  call void @llvm.amdgcn.raw.ptr.buffer.store.v2bf16(<2 x bfloat> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
435  ret void
436}
437
438define amdgpu_ps void @buffer_store_v4bf16(ptr addrspace(8) inreg %rsrc, <4 x bfloat> %data, i32 %offset) #0 {
439; VERDE-LABEL: buffer_store_v4bf16:
440; VERDE:       ; %bb.0:
441; VERDE-NEXT:    v_mul_f32_e32 v3, 1.0, v3
442; VERDE-NEXT:    v_mul_f32_e32 v1, 1.0, v1
443; VERDE-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
444; VERDE-NEXT:    v_mul_f32_e32 v2, 1.0, v2
445; VERDE-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
446; VERDE-NEXT:    v_mul_f32_e32 v0, 1.0, v0
447; VERDE-NEXT:    v_alignbit_b32 v2, v3, v2, 16
448; VERDE-NEXT:    v_alignbit_b32 v1, v1, v0, 16
449; VERDE-NEXT:    buffer_store_dwordx2 v[1:2], v4, s[0:3], 0 offen
450; VERDE-NEXT:    s_endpgm
451;
452; CHECK-LABEL: buffer_store_v4bf16:
453; CHECK:       ; %bb.0:
454; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
455; CHECK-NEXT:    s_endpgm
456  call void @llvm.amdgcn.raw.ptr.buffer.store.v4bf16(<4 x bfloat> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
457  ret void
458}
459
460define amdgpu_ps void @raw_ptr_buffer_store_i16(ptr addrspace(8) inreg %rsrc, i32 %v1) {
461; VERDE-LABEL: raw_ptr_buffer_store_i16:
462; VERDE:       ; %bb.0: ; %main_body
463; VERDE-NEXT:    buffer_store_short v0, off, s[0:3], 0
464; VERDE-NEXT:    s_endpgm
465;
466; CHECK-LABEL: raw_ptr_buffer_store_i16:
467; CHECK:       ; %bb.0: ; %main_body
468; CHECK-NEXT:    buffer_store_short v0, off, s[0:3], 0
469; CHECK-NEXT:    s_endpgm
470main_body:
471  %trunc = trunc i32 %v1 to i16
472  call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %trunc, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
473  ret void
474}
475
476define amdgpu_ps void @buffer_store_v2i16(ptr addrspace(8) inreg %rsrc, <2 x i16> %data, i32 %offset) {
477; VERDE-LABEL: buffer_store_v2i16:
478; VERDE:       ; %bb.0: ; %main_body
479; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
480; VERDE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
481; VERDE-NEXT:    v_or_b32_e32 v0, v0, v1
482; VERDE-NEXT:    buffer_store_dword v0, v2, s[0:3], 0 offen
483; VERDE-NEXT:    s_endpgm
484;
485; CHECK-LABEL: buffer_store_v2i16:
486; CHECK:       ; %bb.0: ; %main_body
487; CHECK-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
488; CHECK-NEXT:    s_endpgm
489main_body:
490  call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
491  ret void
492}
493
494define amdgpu_ps void @buffer_store_v4i16(ptr addrspace(8) inreg %rsrc, <4 x i16> %data, i32 %offset) #0 {
495; VERDE-LABEL: buffer_store_v4i16:
496; VERDE:       ; %bb.0: ; %main_body
497; VERDE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
498; VERDE-NEXT:    v_and_b32_e32 v2, 0xffff, v2
499; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
500; VERDE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
501; VERDE-NEXT:    v_or_b32_e32 v2, v2, v3
502; VERDE-NEXT:    v_or_b32_e32 v1, v0, v1
503; VERDE-NEXT:    buffer_store_dwordx2 v[1:2], v4, s[0:3], 0 offen
504; VERDE-NEXT:    s_endpgm
505;
506; CHECK-LABEL: buffer_store_v4i16:
507; CHECK:       ; %bb.0: ; %main_body
508; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen
509; CHECK-NEXT:    s_endpgm
510main_body:
511  call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
512  ret void
513}
514
515; FIXME:
516; define amdgpu_ps void @buffer_store_v6i16(ptr addrspace(8) inreg %rsrc, <6 x i16> %data, i32 %offset) #0 {
517; main_body:
518;   call void @llvm.amdgcn.raw.ptr.buffer.store.v6i16(<6 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
519;   ret void
520; }
521define amdgpu_ps void @buffer_store_v8i16(ptr addrspace(8) inreg %rsrc, <8 x i16> %data, i32 %offset) #0 {
522; VERDE-LABEL: buffer_store_v8i16:
523; VERDE:       ; %bb.0: ; %main_body
524; VERDE-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
525; VERDE-NEXT:    v_and_b32_e32 v6, 0xffff, v6
526; VERDE-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
527; VERDE-NEXT:    v_and_b32_e32 v4, 0xffff, v4
528; VERDE-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
529; VERDE-NEXT:    v_and_b32_e32 v2, 0xffff, v2
530; VERDE-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
531; VERDE-NEXT:    v_and_b32_e32 v0, 0xffff, v0
532; VERDE-NEXT:    v_or_b32_e32 v6, v6, v7
533; VERDE-NEXT:    v_or_b32_e32 v5, v4, v5
534; VERDE-NEXT:    v_or_b32_e32 v4, v2, v3
535; VERDE-NEXT:    v_or_b32_e32 v3, v0, v1
536; VERDE-NEXT:    buffer_store_dwordx4 v[3:6], v8, s[0:3], 0 offen
537; VERDE-NEXT:    s_endpgm
538;
539; CHECK-LABEL: buffer_store_v8i16:
540; CHECK:       ; %bb.0: ; %main_body
541; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
542; CHECK-NEXT:    s_endpgm
543main_body:
544  call void @llvm.amdgcn.raw.ptr.buffer.store.v8i16(<8 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0)
545  ret void
546}
547
548define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
549; VERDE-LABEL: raw_ptr_buffer_store_x1_offset_merged:
550; VERDE:       ; %bb.0:
551; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
552; VERDE-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
553; VERDE-NEXT:    s_endpgm
554;
555; CHECK-LABEL: raw_ptr_buffer_store_x1_offset_merged:
556; CHECK:       ; %bb.0:
557; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4
558; CHECK-NEXT:    buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28
559; CHECK-NEXT:    s_endpgm
560  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
561  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
562  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
563  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
564  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
565  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
566  ret void
567}
568
569define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
570; VERDE-LABEL: raw_ptr_buffer_store_x1_offset_swizzled_not_merged:
571; VERDE:       ; %bb.0:
572; VERDE-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
573; VERDE-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:8
574; VERDE-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:12
575; VERDE-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:16
576; VERDE-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:28
577; VERDE-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:32
578; VERDE-NEXT:    s_endpgm
579;
580; CHECK-LABEL: raw_ptr_buffer_store_x1_offset_swizzled_not_merged:
581; CHECK:       ; %bb.0:
582; CHECK-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
583; CHECK-NEXT:    buffer_store_dword v1, off, s[0:3], 0 offset:8
584; CHECK-NEXT:    buffer_store_dword v2, off, s[0:3], 0 offset:12
585; CHECK-NEXT:    buffer_store_dword v3, off, s[0:3], 0 offset:16
586; CHECK-NEXT:    buffer_store_dword v4, off, s[0:3], 0 offset:28
587; CHECK-NEXT:    buffer_store_dword v5, off, s[0:3], 0 offset:32
588; CHECK-NEXT:    s_endpgm
589  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8)
590  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8)
591  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8)
592  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8)
593  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8)
594  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8)
595  ret void
596}
597
598define void @buffer_store_f64__voffset_add(ptr addrspace(8) inreg %rsrc, double %data, i32 %voffset) #0 {
599; VERDE-LABEL: buffer_store_f64__voffset_add:
600; VERDE:       ; %bb.0:
601; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
602; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
603; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
604; VERDE-NEXT:    s_setpc_b64 s[30:31]
605;
606; CHECK-LABEL: buffer_store_f64__voffset_add:
607; CHECK:       ; %bb.0:
608; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
610; CHECK-NEXT:    s_waitcnt vmcnt(0)
611; CHECK-NEXT:    s_setpc_b64 s[30:31]
612  %voffset.add = add i32 %voffset, 60
613  call void @llvm.amdgcn.raw.ptr.buffer.store.f64(double %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
614  ret void
615}
616
617define void @buffer_store_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x double> %data, i32 %voffset) #0 {
618; VERDE-LABEL: buffer_store_v2f64__voffset_add:
619; VERDE:       ; %bb.0:
620; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
622; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
623; VERDE-NEXT:    s_setpc_b64 s[30:31]
624;
625; CHECK-LABEL: buffer_store_v2f64__voffset_add:
626; CHECK:       ; %bb.0:
627; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
629; CHECK-NEXT:    s_waitcnt vmcnt(0)
630; CHECK-NEXT:    s_setpc_b64 s[30:31]
631  %voffset.add = add i32 %voffset, 60
632  call void @llvm.amdgcn.raw.ptr.buffer.store.v2f64(<2 x double> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
633  ret void
634}
635
636define void @buffer_store_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i64 %data, i32 %voffset) #0 {
637; VERDE-LABEL: buffer_store_i64__voffset_add:
638; VERDE:       ; %bb.0:
639; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
641; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
642; VERDE-NEXT:    s_setpc_b64 s[30:31]
643;
644; CHECK-LABEL: buffer_store_i64__voffset_add:
645; CHECK:       ; %bb.0:
646; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
647; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
648; CHECK-NEXT:    s_waitcnt vmcnt(0)
649; CHECK-NEXT:    s_setpc_b64 s[30:31]
650  %voffset.add = add i32 %voffset, 60
651  call void @llvm.amdgcn.raw.ptr.buffer.store.i64(i64 %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
652  ret void
653}
654
655define void @buffer_store_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x i64> %data, i32 %voffset) #0 {
656; VERDE-LABEL: buffer_store_v2i64__voffset_add:
657; VERDE:       ; %bb.0:
658; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
660; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
661; VERDE-NEXT:    s_setpc_b64 s[30:31]
662;
663; CHECK-LABEL: buffer_store_v2i64__voffset_add:
664; CHECK:       ; %bb.0:
665; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
667; CHECK-NEXT:    s_waitcnt vmcnt(0)
668; CHECK-NEXT:    s_setpc_b64 s[30:31]
669  %voffset.add = add i32 %voffset, 60
670  call void @llvm.amdgcn.raw.ptr.buffer.store.v2i64(<2 x i64> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
671  ret void
672}
673
674define void @buffer_store_p0__voffset_add(ptr addrspace(8) inreg %rsrc, ptr %data, i32 %voffset) #0 {
675; VERDE-LABEL: buffer_store_p0__voffset_add:
676; VERDE:       ; %bb.0:
677; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
679; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
680; VERDE-NEXT:    s_setpc_b64 s[30:31]
681;
682; CHECK-LABEL: buffer_store_p0__voffset_add:
683; CHECK:       ; %bb.0:
684; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
686; CHECK-NEXT:    s_waitcnt vmcnt(0)
687; CHECK-NEXT:    s_setpc_b64 s[30:31]
688  %voffset.add = add i32 %voffset, 60
689  call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
690  ret void
691}
692
693define void @buffer_store_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr> %data, i32 %voffset) #0 {
694; VERDE-LABEL: buffer_store_v2p0__voffset_add:
695; VERDE:       ; %bb.0:
696; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
698; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
699; VERDE-NEXT:    s_setpc_b64 s[30:31]
700;
701; CHECK-LABEL: buffer_store_v2p0__voffset_add:
702; CHECK:       ; %bb.0:
703; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
705; CHECK-NEXT:    s_waitcnt vmcnt(0)
706; CHECK-NEXT:    s_setpc_b64 s[30:31]
707  %voffset.add = add i32 %voffset, 60
708  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p0(<2 x ptr> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
709  ret void
710}
711
712define void @buffer_store_p1__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %data, i32 %voffset) #0 {
713; VERDE-LABEL: buffer_store_p1__voffset_add:
714; VERDE:       ; %bb.0:
715; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
717; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
718; VERDE-NEXT:    s_setpc_b64 s[30:31]
719;
720; CHECK-LABEL: buffer_store_p1__voffset_add:
721; CHECK:       ; %bb.0:
722; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
724; CHECK-NEXT:    s_waitcnt vmcnt(0)
725; CHECK-NEXT:    s_setpc_b64 s[30:31]
726  %voffset.add = add i32 %voffset, 60
727  call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
728  ret void
729}
730
731define void @buffer_store_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(1)> %data, i32 %voffset) #0 {
732; VERDE-LABEL: buffer_store_v2p1__voffset_add:
733; VERDE:       ; %bb.0:
734; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
736; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
737; VERDE-NEXT:    s_setpc_b64 s[30:31]
738;
739; CHECK-LABEL: buffer_store_v2p1__voffset_add:
740; CHECK:       ; %bb.0:
741; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
743; CHECK-NEXT:    s_waitcnt vmcnt(0)
744; CHECK-NEXT:    s_setpc_b64 s[30:31]
745  %voffset.add = add i32 %voffset, 60
746  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
747  ret void
748}
749
750define void @buffer_store_p4__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(4) %data, i32 %voffset) #0 {
751; VERDE-LABEL: buffer_store_p4__voffset_add:
752; VERDE:       ; %bb.0:
753; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
755; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
756; VERDE-NEXT:    s_setpc_b64 s[30:31]
757;
758; CHECK-LABEL: buffer_store_p4__voffset_add:
759; CHECK:       ; %bb.0:
760; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
762; CHECK-NEXT:    s_waitcnt vmcnt(0)
763; CHECK-NEXT:    s_setpc_b64 s[30:31]
764  %voffset.add = add i32 %voffset, 60
765  call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
766  ret void
767}
768
769define void @buffer_store_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(4)> %data, i32 %voffset) #0 {
770; VERDE-LABEL: buffer_store_v2p4__voffset_add:
771; VERDE:       ; %bb.0:
772; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
774; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
775; VERDE-NEXT:    s_setpc_b64 s[30:31]
776;
777; CHECK-LABEL: buffer_store_v2p4__voffset_add:
778; CHECK:       ; %bb.0:
779; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
780; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
781; CHECK-NEXT:    s_waitcnt vmcnt(0)
782; CHECK-NEXT:    s_setpc_b64 s[30:31]
783  %voffset.add = add i32 %voffset, 60
784  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p4(<2 x ptr addrspace(4)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
785  ret void
786}
787
788define void @buffer_store_p999__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(999) %data, i32 %voffset) #0 {
789; VERDE-LABEL: buffer_store_p999__voffset_add:
790; VERDE:       ; %bb.0:
791; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
793; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
794; VERDE-NEXT:    s_setpc_b64 s[30:31]
795;
796; CHECK-LABEL: buffer_store_p999__voffset_add:
797; CHECK:       ; %bb.0:
798; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
800; CHECK-NEXT:    s_waitcnt vmcnt(0)
801; CHECK-NEXT:    s_setpc_b64 s[30:31]
802  %voffset.add = add i32 %voffset, 60
803  call void @llvm.amdgcn.raw.ptr.buffer.store.p999(ptr addrspace(999) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
804  ret void
805}
806
807define void @buffer_store_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(999)> %data, i32 %voffset) #0 {
808; VERDE-LABEL: buffer_store_v2p999__voffset_add:
809; VERDE:       ; %bb.0:
810; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
812; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
813; VERDE-NEXT:    s_setpc_b64 s[30:31]
814;
815; CHECK-LABEL: buffer_store_v2p999__voffset_add:
816; CHECK:       ; %bb.0:
817; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
819; CHECK-NEXT:    s_waitcnt vmcnt(0)
820; CHECK-NEXT:    s_setpc_b64 s[30:31]
821  %voffset.add = add i32 %voffset, 60
822  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p999(<2 x ptr addrspace(999)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
823  ret void
824}
825
826define void @buffer_store_p2__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(2) %data, i32 %voffset) #0 {
827; VERDE-LABEL: buffer_store_p2__voffset_add:
828; VERDE:       ; %bb.0:
829; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; VERDE-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
831; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
832; VERDE-NEXT:    s_setpc_b64 s[30:31]
833;
834; CHECK-LABEL: buffer_store_p2__voffset_add:
835; CHECK:       ; %bb.0:
836; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837; CHECK-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
838; CHECK-NEXT:    s_waitcnt vmcnt(0)
839; CHECK-NEXT:    s_setpc_b64 s[30:31]
840  %voffset.add = add i32 %voffset, 60
841  call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
842  ret void
843}
844
845define void @buffer_store_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(2)> %data, i32 %voffset) #0 {
846; VERDE-LABEL: buffer_store_v2p2__voffset_add:
847; VERDE:       ; %bb.0:
848; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
850; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
851; VERDE-NEXT:    s_setpc_b64 s[30:31]
852;
853; CHECK-LABEL: buffer_store_v2p2__voffset_add:
854; CHECK:       ; %bb.0:
855; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
857; CHECK-NEXT:    s_waitcnt vmcnt(0)
858; CHECK-NEXT:    s_setpc_b64 s[30:31]
859  %voffset.add = add i32 %voffset, 60
860  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p2(<2 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
861  ret void
862}
863
864define void @buffer_store_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(2)> %data, i32 %voffset) #0 {
865; VERDE-LABEL: buffer_store_v3p2__voffset_add:
866; VERDE:       ; %bb.0:
867; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
868; VERDE-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
869; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
870; VERDE-NEXT:    s_setpc_b64 s[30:31]
871;
872; CHECK-LABEL: buffer_store_v3p2__voffset_add:
873; CHECK:       ; %bb.0:
874; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
875; CHECK-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
876; CHECK-NEXT:    s_waitcnt vmcnt(0)
877; CHECK-NEXT:    s_setpc_b64 s[30:31]
878  %voffset.add = add i32 %voffset, 60
879  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p2(<3 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
880  ret void
881}
882
883define void @buffer_store_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(2)> %data, i32 %voffset) #0 {
884; VERDE-LABEL: buffer_store_v4p2__voffset_add:
885; VERDE:       ; %bb.0:
886; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
888; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
889; VERDE-NEXT:    s_setpc_b64 s[30:31]
890;
891; CHECK-LABEL: buffer_store_v4p2__voffset_add:
892; CHECK:       ; %bb.0:
893; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
895; CHECK-NEXT:    s_waitcnt vmcnt(0)
896; CHECK-NEXT:    s_setpc_b64 s[30:31]
897  %voffset.add = add i32 %voffset, 60
898  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p2(<4 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
899  ret void
900}
901
902define void @buffer_store_p3__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %data, i32 %voffset) #0 {
903; VERDE-LABEL: buffer_store_p3__voffset_add:
904; VERDE:       ; %bb.0:
905; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906; VERDE-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
907; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
908; VERDE-NEXT:    s_setpc_b64 s[30:31]
909;
910; CHECK-LABEL: buffer_store_p3__voffset_add:
911; CHECK:       ; %bb.0:
912; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; CHECK-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
914; CHECK-NEXT:    s_waitcnt vmcnt(0)
915; CHECK-NEXT:    s_setpc_b64 s[30:31]
916  %voffset.add = add i32 %voffset, 60
917  call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
918  ret void
919}
920
921define void @buffer_store_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(3)> %data, i32 %voffset) #0 {
922; VERDE-LABEL: buffer_store_v2p3__voffset_add:
923; VERDE:       ; %bb.0:
924; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
926; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
927; VERDE-NEXT:    s_setpc_b64 s[30:31]
928;
929; CHECK-LABEL: buffer_store_v2p3__voffset_add:
930; CHECK:       ; %bb.0:
931; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
932; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
933; CHECK-NEXT:    s_waitcnt vmcnt(0)
934; CHECK-NEXT:    s_setpc_b64 s[30:31]
935  %voffset.add = add i32 %voffset, 60
936  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p3(<2 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
937  ret void
938}
939
940define void @buffer_store_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(3)> %data, i32 %voffset) #0 {
941; VERDE-LABEL: buffer_store_v3p3__voffset_add:
942; VERDE:       ; %bb.0:
943; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
944; VERDE-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
945; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
946; VERDE-NEXT:    s_setpc_b64 s[30:31]
947;
948; CHECK-LABEL: buffer_store_v3p3__voffset_add:
949; CHECK:       ; %bb.0:
950; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
951; CHECK-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
952; CHECK-NEXT:    s_waitcnt vmcnt(0)
953; CHECK-NEXT:    s_setpc_b64 s[30:31]
954  %voffset.add = add i32 %voffset, 60
955  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p3(<3 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
956  ret void
957}
958
959define void @buffer_store_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(3)> %data, i32 %voffset) #0 {
960; VERDE-LABEL: buffer_store_v4p3__voffset_add:
961; VERDE:       ; %bb.0:
962; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
963; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
964; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
965; VERDE-NEXT:    s_setpc_b64 s[30:31]
966;
967; CHECK-LABEL: buffer_store_v4p3__voffset_add:
968; CHECK:       ; %bb.0:
969; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
971; CHECK-NEXT:    s_waitcnt vmcnt(0)
972; CHECK-NEXT:    s_setpc_b64 s[30:31]
973  %voffset.add = add i32 %voffset, 60
974  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p3(<4 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
975  ret void
976}
977
978define void @buffer_store_p5__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(5) %data, i32 %voffset) #0 {
979; VERDE-LABEL: buffer_store_p5__voffset_add:
980; VERDE:       ; %bb.0:
981; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; VERDE-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
983; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
984; VERDE-NEXT:    s_setpc_b64 s[30:31]
985;
986; CHECK-LABEL: buffer_store_p5__voffset_add:
987; CHECK:       ; %bb.0:
988; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989; CHECK-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
990; CHECK-NEXT:    s_waitcnt vmcnt(0)
991; CHECK-NEXT:    s_setpc_b64 s[30:31]
992  %voffset.add = add i32 %voffset, 60
993  call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
994  ret void
995}
996
997define void @buffer_store_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(5)> %data, i32 %voffset) #0 {
998; VERDE-LABEL: buffer_store_v2p5__voffset_add:
999; VERDE:       ; %bb.0:
1000; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1001; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
1002; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1003; VERDE-NEXT:    s_setpc_b64 s[30:31]
1004;
1005; CHECK-LABEL: buffer_store_v2p5__voffset_add:
1006; CHECK:       ; %bb.0:
1007; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1008; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
1009; CHECK-NEXT:    s_waitcnt vmcnt(0)
1010; CHECK-NEXT:    s_setpc_b64 s[30:31]
1011  %voffset.add = add i32 %voffset, 60
1012  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1013  ret void
1014}
1015
1016define void @buffer_store_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(5)> %data, i32 %voffset) #0 {
1017; VERDE-LABEL: buffer_store_v3p5__voffset_add:
1018; VERDE:       ; %bb.0:
1019; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020; VERDE-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
1021; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1022; VERDE-NEXT:    s_setpc_b64 s[30:31]
1023;
1024; CHECK-LABEL: buffer_store_v3p5__voffset_add:
1025; CHECK:       ; %bb.0:
1026; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027; CHECK-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
1028; CHECK-NEXT:    s_waitcnt vmcnt(0)
1029; CHECK-NEXT:    s_setpc_b64 s[30:31]
1030  %voffset.add = add i32 %voffset, 60
1031  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1032  ret void
1033}
1034
1035define void @buffer_store_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(5)> %data, i32 %voffset) #0 {
1036; VERDE-LABEL: buffer_store_v4p5__voffset_add:
1037; VERDE:       ; %bb.0:
1038; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1039; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
1040; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1041; VERDE-NEXT:    s_setpc_b64 s[30:31]
1042;
1043; CHECK-LABEL: buffer_store_v4p5__voffset_add:
1044; CHECK:       ; %bb.0:
1045; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1046; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
1047; CHECK-NEXT:    s_waitcnt vmcnt(0)
1048; CHECK-NEXT:    s_setpc_b64 s[30:31]
1049  %voffset.add = add i32 %voffset, 60
1050  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1051  ret void
1052}
1053
1054define void @buffer_store_p6__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(6) %data, i32 %voffset) #0 {
1055; VERDE-LABEL: buffer_store_p6__voffset_add:
1056; VERDE:       ; %bb.0:
1057; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058; VERDE-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
1059; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1060; VERDE-NEXT:    s_setpc_b64 s[30:31]
1061;
1062; CHECK-LABEL: buffer_store_p6__voffset_add:
1063; CHECK:       ; %bb.0:
1064; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1065; CHECK-NEXT:    buffer_store_dword v0, v1, s[16:19], 0 offen offset:60
1066; CHECK-NEXT:    s_waitcnt vmcnt(0)
1067; CHECK-NEXT:    s_setpc_b64 s[30:31]
1068  %voffset.add = add i32 %voffset, 60
1069  call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1070  ret void
1071}
1072
1073define void @buffer_store_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(6)> %data, i32 %voffset) #0 {
1074; VERDE-LABEL: buffer_store_v2p6__voffset_add:
1075; VERDE:       ; %bb.0:
1076; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077; VERDE-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
1078; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1079; VERDE-NEXT:    s_setpc_b64 s[30:31]
1080;
1081; CHECK-LABEL: buffer_store_v2p6__voffset_add:
1082; CHECK:       ; %bb.0:
1083; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084; CHECK-NEXT:    buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60
1085; CHECK-NEXT:    s_waitcnt vmcnt(0)
1086; CHECK-NEXT:    s_setpc_b64 s[30:31]
1087  %voffset.add = add i32 %voffset, 60
1088  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p6(<2 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1089  ret void
1090}
1091
1092define void @buffer_store_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(6)> %data, i32 %voffset) #0 {
1093; VERDE-LABEL: buffer_store_v3p6__voffset_add:
1094; VERDE:       ; %bb.0:
1095; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1096; VERDE-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
1097; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1098; VERDE-NEXT:    s_setpc_b64 s[30:31]
1099;
1100; CHECK-LABEL: buffer_store_v3p6__voffset_add:
1101; CHECK:       ; %bb.0:
1102; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103; CHECK-NEXT:    buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60
1104; CHECK-NEXT:    s_waitcnt vmcnt(0)
1105; CHECK-NEXT:    s_setpc_b64 s[30:31]
1106  %voffset.add = add i32 %voffset, 60
1107  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p6(<3 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1108  ret void
1109}
1110
1111define void @buffer_store_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(6)> %data, i32 %voffset) #0 {
1112; VERDE-LABEL: buffer_store_v4p6__voffset_add:
1113; VERDE:       ; %bb.0:
1114; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1115; VERDE-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
1116; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1117; VERDE-NEXT:    s_setpc_b64 s[30:31]
1118;
1119; CHECK-LABEL: buffer_store_v4p6__voffset_add:
1120; CHECK:       ; %bb.0:
1121; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1122; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60
1123; CHECK-NEXT:    s_waitcnt vmcnt(0)
1124; CHECK-NEXT:    s_setpc_b64 s[30:31]
1125  %voffset.add = add i32 %voffset, 60
1126  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p6(<4 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1127  ret void
1128}
1129
1130declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0
1131declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
1132declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0
1133declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32) #0
1134declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0
1135declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32) #0
1136declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
1137declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32) #0
1138declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32) #0
1139declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32) #0
1140declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32) #0
1141declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32) #0
1142declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32) #0
1143declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32) #0
1144
1145attributes #0 = { nounwind }
1146attributes #1 = { nounwind readonly }
1147