xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll (revision 704116373ae91a1b829dc3d3d269874fb27b579c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,SI
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,VI
4
5define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
6; CHECK-LABEL: buffer_load:
7; CHECK:       ; %bb.0: ; %main_body
8; CHECK-NEXT:    v_mov_b32_e32 v8, 0
9; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v8, s[0:3], 0 idxen
10; CHECK-NEXT:    buffer_load_dwordx4 v[4:7], v8, s[0:3], 0 idxen glc
11; CHECK-NEXT:    buffer_load_dwordx4 v[8:11], v8, s[0:3], 0 idxen slc
12; CHECK-NEXT:    s_waitcnt vmcnt(0)
13; CHECK-NEXT:    ; return to shader part epilog
14main_body:
15  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
16  %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
17  %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
18  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
19  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
20  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
21  ret {<4 x float>, <4 x float>, <4 x float>} %r2
22}
23
24define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
25; CHECK-LABEL: buffer_load_immoffs:
26; CHECK:       ; %bb.0: ; %main_body
27; CHECK-NEXT:    v_mov_b32_e32 v0, 0
28; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 idxen offset:40
29; CHECK-NEXT:    s_waitcnt vmcnt(0)
30; CHECK-NEXT:    ; return to shader part epilog
31main_body:
32  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 40, i32 0, i32 0)
33  ret <4 x float> %data
34}
35
36define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
37; CHECK-LABEL: buffer_load_immoffs_large:
38; CHECK:       ; %bb.0: ; %main_body
39; CHECK-NEXT:    s_movk_i32 s4, 0x1ffc
40; CHECK-NEXT:    v_mov_b32_e32 v0, 0
41; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], s4 idxen offset:4
42; CHECK-NEXT:    s_waitcnt vmcnt(0)
43; CHECK-NEXT:    ; return to shader part epilog
44main_body:
45  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 8188, i32 0)
46  ret <4 x float> %data
47}
48
49define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
50; CHECK-LABEL: buffer_load_idx:
51; CHECK:       ; %bb.0: ; %main_body
52; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 idxen
53; CHECK-NEXT:    s_waitcnt vmcnt(0)
54; CHECK-NEXT:    ; return to shader part epilog
55main_body:
56  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
57  ret <4 x float> %data
58}
59
60define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
61; CHECK-LABEL: buffer_load_ofs:
62; CHECK:       ; %bb.0: ; %main_body
63; CHECK-NEXT:    s_mov_b32 s4, 0
64; CHECK-NEXT:    v_mov_b32_e32 v1, v0
65; CHECK-NEXT:    v_mov_b32_e32 v0, s4
66; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
67; CHECK-NEXT:    s_waitcnt vmcnt(0)
68; CHECK-NEXT:    ; return to shader part epilog
69main_body:
70  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
71  ret <4 x float> %data
72}
73
74define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
75; CHECK-LABEL: buffer_load_ofs_imm:
76; CHECK:       ; %bb.0: ; %main_body
77; CHECK-NEXT:    s_mov_b32 s4, 0
78; CHECK-NEXT:    v_mov_b32_e32 v1, v0
79; CHECK-NEXT:    v_mov_b32_e32 v0, s4
80; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
81; CHECK-NEXT:    s_waitcnt vmcnt(0)
82; CHECK-NEXT:    ; return to shader part epilog
83main_body:
84  %ofs = add i32 %1, 60
85  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
86  ret <4 x float> %data
87}
88
89define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
90; CHECK-LABEL: buffer_load_both:
91; CHECK:       ; %bb.0: ; %main_body
92; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
93; CHECK-NEXT:    s_waitcnt vmcnt(0)
94; CHECK-NEXT:    ; return to shader part epilog
95main_body:
96  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
97  ret <4 x float> %data
98}
99
100define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
101; CHECK-LABEL: buffer_load_both_reversed:
102; CHECK:       ; %bb.0: ; %main_body
103; CHECK-NEXT:    v_mov_b32_e32 v2, v0
104; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v[1:2], s[0:3], 0 idxen offen
105; CHECK-NEXT:    s_waitcnt vmcnt(0)
106; CHECK-NEXT:    ; return to shader part epilog
107main_body:
108  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
109  ret <4 x float> %data
110}
111
112define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
113; CHECK-LABEL: buffer_load_x1:
114; CHECK:       ; %bb.0: ; %main_body
115; CHECK-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 idxen offen
116; CHECK-NEXT:    s_waitcnt vmcnt(0)
117; CHECK-NEXT:    ; return to shader part epilog
118main_body:
119  %data = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
120  ret float %data
121}
122
123define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
124; CHECK-LABEL: buffer_load_x2:
125; CHECK:       ; %bb.0: ; %main_body
126; CHECK-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 idxen offen
127; CHECK-NEXT:    s_waitcnt vmcnt(0)
128; CHECK-NEXT:    ; return to shader part epilog
129main_body:
130  %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
131  ret <2 x float> %data
132}
133
134define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) {
135; SI-LABEL: buffer_load_negative_offset:
136; SI:       ; %bb.0: ; %main_body
137; SI-NEXT:    s_mov_b32 s4, 0
138; SI-NEXT:    v_add_i32_e32 v1, vcc, -16, v0
139; SI-NEXT:    v_mov_b32_e32 v0, s4
140; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
141; SI-NEXT:    s_waitcnt vmcnt(0)
142; SI-NEXT:    ; return to shader part epilog
143;
144; VI-LABEL: buffer_load_negative_offset:
145; VI:       ; %bb.0: ; %main_body
146; VI-NEXT:    s_mov_b32 s4, 0
147; VI-NEXT:    v_add_u32_e32 v1, vcc, -16, v0
148; VI-NEXT:    v_mov_b32_e32 v0, s4
149; VI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 idxen offen
150; VI-NEXT:    s_waitcnt vmcnt(0)
151; VI-NEXT:    ; return to shader part epilog
152main_body:
153  %ofs.1 = add i32 %ofs, -16
154  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs.1, i32 0, i32 0)
155  ret <4 x float> %data
156}
157
158; SI won't merge ds memory operations, because of the signed offset bug.
159define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) {
160; SI-LABEL: buffer_load_mmo:
161; SI:       ; %bb.0: ; %entry
162; SI-NEXT:    v_mov_b32_e32 v2, 0
163; SI-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 idxen
164; SI-NEXT:    s_mov_b32 m0, -1
165; SI-NEXT:    ds_write_b32 v0, v2
166; SI-NEXT:    v_add_i32_e32 v0, vcc, 16, v0
167; SI-NEXT:    ds_write_b32 v0, v2
168; SI-NEXT:    s_waitcnt vmcnt(0)
169; SI-NEXT:    v_mov_b32_e32 v0, v1
170; SI-NEXT:    s_waitcnt lgkmcnt(0)
171; SI-NEXT:    ; return to shader part epilog
172;
173; VI-LABEL: buffer_load_mmo:
174; VI:       ; %bb.0: ; %entry
175; VI-NEXT:    v_mov_b32_e32 v2, 0
176; VI-NEXT:    buffer_load_dword v1, v2, s[0:3], 0 idxen
177; VI-NEXT:    s_mov_b32 m0, -1
178; VI-NEXT:    ds_write2_b32 v0, v2, v2 offset1:4
179; VI-NEXT:    s_waitcnt vmcnt(0)
180; VI-NEXT:    v_mov_b32_e32 v0, v1
181; VI-NEXT:    s_waitcnt lgkmcnt(0)
182; VI-NEXT:    ; return to shader part epilog
183entry:
184  store float 0.0, ptr addrspace(3) %lds
185  %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
186  %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
187  store float 0.0, ptr addrspace(3) %tmp2
188  ret float %val
189}
190
191define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) {
192; CHECK-LABEL: buffer_load_int:
193; CHECK:       ; %bb.0: ; %main_body
194; CHECK-NEXT:    v_mov_b32_e32 v6, 0
195; CHECK-NEXT:    buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 idxen
196; CHECK-NEXT:    buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 idxen glc
197; CHECK-NEXT:    buffer_load_dword v6, v6, s[0:3], 0 idxen slc
198; CHECK-NEXT:    s_waitcnt vmcnt(0)
199; CHECK-NEXT:    ; return to shader part epilog
200main_body:
201  %data = call <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
202  %data_glc = call <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
203  %data_slc = call i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
204  %fdata = bitcast <4 x i32> %data to <4 x float>
205  %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
206  %fdata_slc = bitcast i32 %data_slc to float
207  %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
208  %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
209  %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
210  ret {<4 x float>, <2 x float>, float} %r2
211}
212
213define amdgpu_ps float @struct_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
214; CHECK-LABEL: struct_ptr_buffer_load_ubyte:
215; CHECK:       ; %bb.0: ; %main_body
216; CHECK-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 idxen offen
217; CHECK-NEXT:    s_waitcnt vmcnt(0)
218; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
219; CHECK-NEXT:    ; return to shader part epilog
220main_body:
221  %tmp = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
222  %tmp2 = zext i8 %tmp to i32
223  %val = uitofp i32 %tmp2 to float
224  ret float %val
225}
226
227define amdgpu_ps float @struct_ptr_buffer_load_ushort(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
228; CHECK-LABEL: struct_ptr_buffer_load_ushort:
229; CHECK:       ; %bb.0: ; %main_body
230; CHECK-NEXT:    buffer_load_ushort v0, v[0:1], s[0:3], 0 idxen offen
231; CHECK-NEXT:    s_waitcnt vmcnt(0)
232; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v0
233; CHECK-NEXT:    ; return to shader part epilog
234main_body:
235  %tmp = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
236  %tmp2 = zext i16 %tmp to i32
237  %val = uitofp i32 %tmp2 to float
238  ret float %val
239}
240
241define amdgpu_ps float @struct_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
242; CHECK-LABEL: struct_ptr_buffer_load_sbyte:
243; CHECK:       ; %bb.0: ; %main_body
244; CHECK-NEXT:    buffer_load_sbyte v0, v[0:1], s[0:3], 0 idxen offen
245; CHECK-NEXT:    s_waitcnt vmcnt(0)
246; CHECK-NEXT:    v_cvt_f32_i32_e32 v0, v0
247; CHECK-NEXT:    ; return to shader part epilog
248main_body:
249  %tmp = call i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
250  %tmp2 = sext i8 %tmp to i32
251  %val = sitofp i32 %tmp2 to float
252  ret float %val
253}
254
255define amdgpu_ps float @struct_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc, i32 %idx, i32 %ofs) {
256; CHECK-LABEL: struct_ptr_buffer_load_sshort:
257; CHECK:       ; %bb.0: ; %main_body
258; CHECK-NEXT:    buffer_load_sshort v0, v[0:1], s[0:3], 0 idxen offen
259; CHECK-NEXT:    s_waitcnt vmcnt(0)
260; CHECK-NEXT:    v_cvt_f32_i32_e32 v0, v0
261; CHECK-NEXT:    ; return to shader part epilog
262main_body:
263  %tmp = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 %ofs, i32 0, i32 0)
264  %tmp2 = sext i16 %tmp to i32
265  %val = sitofp i32 %tmp2 to float
266  ret float %val
267}
268
269define amdgpu_ps void @struct_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
270; CHECK-LABEL: struct_ptr_buffer_load_f16:
271; CHECK:       ; %bb.0: ; %main_body
272; CHECK-NEXT:    buffer_load_ushort v1, v1, s[0:3], 0 idxen
273; CHECK-NEXT:    s_mov_b32 m0, -1
274; CHECK-NEXT:    s_waitcnt vmcnt(0)
275; CHECK-NEXT:    ds_write_b16 v0, v1
276; CHECK-NEXT:    s_endpgm
277main_body:
278  %val = call half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
279  store half %val, ptr addrspace(3) %ptr
280  ret void
281}
282
283define amdgpu_ps void @struct_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
284; CHECK-LABEL: struct_ptr_buffer_load_v2f16:
285; CHECK:       ; %bb.0: ; %main_body
286; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 idxen
287; CHECK-NEXT:    s_mov_b32 m0, -1
288; CHECK-NEXT:    s_waitcnt vmcnt(0)
289; CHECK-NEXT:    ds_write_b32 v0, v1
290; CHECK-NEXT:    s_endpgm
291main_body:
292  %val = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
293  store <2 x half> %val, ptr addrspace(3) %ptr
294  ret void
295}
296
297define amdgpu_ps void @struct_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
298; CHECK-LABEL: struct_ptr_buffer_load_v4f16:
299; CHECK:       ; %bb.0: ; %main_body
300; CHECK-NEXT:    buffer_load_dwordx2 v[1:2], v1, s[0:3], 0 idxen
301; CHECK-NEXT:    s_mov_b32 m0, -1
302; CHECK-NEXT:    s_waitcnt vmcnt(0)
303; CHECK-NEXT:    ds_write_b64 v0, v[1:2]
304; CHECK-NEXT:    s_endpgm
305main_body:
306  %val = call <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
307  store <4 x half> %val, ptr addrspace(3) %ptr
308  ret void
309}
310
311define amdgpu_ps void @struct_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
312; CHECK-LABEL: struct_ptr_buffer_load_i16:
313; CHECK:       ; %bb.0: ; %main_body
314; CHECK-NEXT:    buffer_load_ushort v1, v1, s[0:3], 0 idxen
315; CHECK-NEXT:    s_mov_b32 m0, -1
316; CHECK-NEXT:    s_waitcnt vmcnt(0)
317; CHECK-NEXT:    ds_write_b16 v0, v1
318; CHECK-NEXT:    s_endpgm
319main_body:
320  %val = call i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
321  store i16 %val, ptr addrspace(3) %ptr
322  ret void
323}
324
325define amdgpu_ps void @struct_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
326; CHECK-LABEL: struct_ptr_buffer_load_v2i16:
327; CHECK:       ; %bb.0: ; %main_body
328; CHECK-NEXT:    buffer_load_dword v1, v1, s[0:3], 0 idxen
329; CHECK-NEXT:    s_mov_b32 m0, -1
330; CHECK-NEXT:    s_waitcnt vmcnt(0)
331; CHECK-NEXT:    ds_write_b32 v0, v1
332; CHECK-NEXT:    s_endpgm
333main_body:
334  %val = call <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
335  store <2 x i16> %val, ptr addrspace(3) %ptr
336  ret void
337}
338
339define amdgpu_ps void @struct_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) {
340; CHECK-LABEL: struct_ptr_buffer_load_v4i16:
341; CHECK:       ; %bb.0: ; %main_body
342; CHECK-NEXT:    buffer_load_dwordx2 v[1:2], v1, s[0:3], 0 idxen
343; CHECK-NEXT:    s_mov_b32 m0, -1
344; CHECK-NEXT:    s_waitcnt vmcnt(0)
345; CHECK-NEXT:    ds_write_b64 v0, v[1:2]
346; CHECK-NEXT:    s_endpgm
347main_body:
348  %val = call <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 %idx, i32 0, i32 0, i32 0)
349  store <4 x i16> %val, ptr addrspace(3) %ptr
350  ret void
351}
352
353declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #0
354declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
355declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
356declare i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32, i32) #0
357declare <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32) #0
358declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #0
359declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
360declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #0
361
362declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #0
363declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #0
364declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #0
365
366declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32) #0
367declare <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32, i32) #0
368declare <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32, i32) #0
369
370attributes #0 = { nounwind readonly }
371