xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10
4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10
5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11
6
7define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
8; PREGFX10-LABEL: buffer_load:
9; PREGFX10:       ; %bb.0: ; %main_body
10; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
11; PREGFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
12; PREGFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
13; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
14; PREGFX10-NEXT:    ; return to shader part epilog
15;
16; GFX10-LABEL: buffer_load:
17; GFX10:       ; %bb.0: ; %main_body
18; GFX10-NEXT:    s_clause 0x2
19; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
20; GFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
21; GFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
22; GFX10-NEXT:    s_waitcnt vmcnt(0)
23; GFX10-NEXT:    ; return to shader part epilog
24;
25; GFX11-LABEL: buffer_load:
26; GFX11:       ; %bb.0: ; %main_body
27; GFX11-NEXT:    s_clause 0x2
28; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
29; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 glc
30; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 slc
31; GFX11-NEXT:    s_waitcnt vmcnt(0)
32; GFX11-NEXT:    ; return to shader part epilog
33main_body:
34  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
35  %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
36  %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
37  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
38  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
39  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
40  ret {<4 x float>, <4 x float>, <4 x float>} %r2
41}
42
43define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_dlc(ptr addrspace(8) inreg) {
44; PREGFX10-LABEL: buffer_load_dlc:
45; PREGFX10:       ; %bb.0: ; %main_body
46; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
47; PREGFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
48; PREGFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc
49; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
50; PREGFX10-NEXT:    ; return to shader part epilog
51;
52; GFX10-LABEL: buffer_load_dlc:
53; GFX10:       ; %bb.0: ; %main_body
54; GFX10-NEXT:    s_clause 0x2
55; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 dlc
56; GFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
57; GFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc dlc
58; GFX10-NEXT:    s_waitcnt vmcnt(0)
59; GFX10-NEXT:    ; return to shader part epilog
60;
61; GFX11-LABEL: buffer_load_dlc:
62; GFX11:       ; %bb.0: ; %main_body
63; GFX11-NEXT:    s_clause 0x2
64; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 dlc
65; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
66; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 slc dlc
67; GFX11-NEXT:    s_waitcnt vmcnt(0)
68; GFX11-NEXT:    ; return to shader part epilog
69main_body:
70  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 4)
71  %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 5)
72  %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 6)
73  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
74  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
75  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
76  ret {<4 x float>, <4 x float>, <4 x float>} %r2
77}
78
79define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_volatile(ptr addrspace(8) inreg) {
80; PREGFX10-LABEL: buffer_load_volatile:
81; PREGFX10:       ; %bb.0: ; %main_body
82; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc
83; PREGFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc
84; PREGFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc
85; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
86; PREGFX10-NEXT:    ; return to shader part epilog
87;
88; GFX10-LABEL: buffer_load_volatile:
89; GFX10:       ; %bb.0: ; %main_body
90; GFX10-NEXT:    s_clause 0x2
91; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc dlc
92; GFX10-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc
93; GFX10-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc dlc
94; GFX10-NEXT:    s_waitcnt vmcnt(0)
95; GFX10-NEXT:    ; return to shader part epilog
96;
97; GFX11-LABEL: buffer_load_volatile:
98; GFX11:       ; %bb.0: ; %main_body
99; GFX11-NEXT:    s_clause 0x2
100; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
101; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc
102; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 glc slc dlc
103; GFX11-NEXT:    s_waitcnt vmcnt(0)
104; GFX11-NEXT:    ; return to shader part epilog
105main_body:
106  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483648)
107  %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483647)
108  %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483646)
109  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
110  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
111  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
112  ret {<4 x float>, <4 x float>, <4 x float>} %r2
113}
114
115define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
116; PREGFX10-LABEL: buffer_load_immoffs:
117; PREGFX10:       ; %bb.0: ; %main_body
118; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
119; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
120; PREGFX10-NEXT:    ; return to shader part epilog
121;
122; GFX10-LABEL: buffer_load_immoffs:
123; GFX10:       ; %bb.0: ; %main_body
124; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40
125; GFX10-NEXT:    s_waitcnt vmcnt(0)
126; GFX10-NEXT:    ; return to shader part epilog
127;
128; GFX11-LABEL: buffer_load_immoffs:
129; GFX11:       ; %bb.0: ; %main_body
130; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:40
131; GFX11-NEXT:    s_waitcnt vmcnt(0)
132; GFX11-NEXT:    ; return to shader part epilog
133main_body:
134  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 40, i32 0, i32 0)
135  ret <4 x float> %data
136}
137
138define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
139; PREGFX10-LABEL: buffer_load_immoffs_large:
140; PREGFX10:       ; %bb.0: ; %main_body
141; PREGFX10-NEXT:    s_movk_i32 s4, 0x1ffc
142; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
143; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
144; PREGFX10-NEXT:    ; return to shader part epilog
145;
146; GFX10-LABEL: buffer_load_immoffs_large:
147; GFX10:       ; %bb.0: ; %main_body
148; GFX10-NEXT:    s_movk_i32 s4, 0x1ffc
149; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4
150; GFX10-NEXT:    s_waitcnt vmcnt(0)
151; GFX10-NEXT:    ; return to shader part epilog
152;
153; GFX11-LABEL: buffer_load_immoffs_large:
154; GFX11:       ; %bb.0: ; %main_body
155; GFX11-NEXT:    s_movk_i32 s4, 0x1ffc
156; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], s4 offset:4
157; GFX11-NEXT:    s_waitcnt vmcnt(0)
158; GFX11-NEXT:    ; return to shader part epilog
159main_body:
160  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4, i32 8188, i32 0)
161  ret <4 x float> %data
162}
163
164define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
165; PREGFX10-LABEL: buffer_load_ofs:
166; PREGFX10:       ; %bb.0: ; %main_body
167; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
168; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
169; PREGFX10-NEXT:    ; return to shader part epilog
170;
171; GFX10-LABEL: buffer_load_ofs:
172; GFX10:       ; %bb.0: ; %main_body
173; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
174; GFX10-NEXT:    s_waitcnt vmcnt(0)
175; GFX10-NEXT:    ; return to shader part epilog
176;
177; GFX11-LABEL: buffer_load_ofs:
178; GFX11:       ; %bb.0: ; %main_body
179; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
180; GFX11-NEXT:    s_waitcnt vmcnt(0)
181; GFX11-NEXT:    ; return to shader part epilog
182main_body:
183  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0)
184  ret <4 x float> %data
185}
186
187define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
188; PREGFX10-LABEL: buffer_load_ofs_imm:
189; PREGFX10:       ; %bb.0: ; %main_body
190; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
191; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
192; PREGFX10-NEXT:    ; return to shader part epilog
193;
194; GFX10-LABEL: buffer_load_ofs_imm:
195; GFX10:       ; %bb.0: ; %main_body
196; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60
197; GFX10-NEXT:    s_waitcnt vmcnt(0)
198; GFX10-NEXT:    ; return to shader part epilog
199;
200; GFX11-LABEL: buffer_load_ofs_imm:
201; GFX11:       ; %bb.0: ; %main_body
202; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
203; GFX11-NEXT:    s_waitcnt vmcnt(0)
204; GFX11-NEXT:    ; return to shader part epilog
205main_body:
206  %ofs = add i32 %1, 60
207  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs, i32 0, i32 0)
208  ret <4 x float> %data
209}
210
211define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
212; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
213; PREGFX10:       ; %bb.0: ; %main_body
214; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
215; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
216; PREGFX10-NEXT:    ; return to shader part epilog
217;
218; GFX10-LABEL: buffer_load_voffset_large_12bit:
219; GFX10:       ; %bb.0: ; %main_body
220; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092
221; GFX10-NEXT:    s_waitcnt vmcnt(0)
222; GFX10-NEXT:    ; return to shader part epilog
223;
224; GFX11-LABEL: buffer_load_voffset_large_12bit:
225; GFX11:       ; %bb.0: ; %main_body
226; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092
227; GFX11-NEXT:    s_waitcnt vmcnt(0)
228; GFX11-NEXT:    ; return to shader part epilog
229main_body:
230  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4092, i32 0, i32 0)
231  ret <4 x float> %data
232}
233
234define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
235; PREGFX10-LABEL: buffer_load_voffset_large_13bit:
236; PREGFX10:       ; %bb.0: ; %main_body
237; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
238; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
239; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
240; PREGFX10-NEXT:    ; return to shader part epilog
241;
242; GFX10-LABEL: buffer_load_voffset_large_13bit:
243; GFX10:       ; %bb.0: ; %main_body
244; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
245; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
246; GFX10-NEXT:    s_waitcnt vmcnt(0)
247; GFX10-NEXT:    ; return to shader part epilog
248;
249; GFX11-LABEL: buffer_load_voffset_large_13bit:
250; GFX11:       ; %bb.0: ; %main_body
251; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
252; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
253; GFX11-NEXT:    s_waitcnt vmcnt(0)
254; GFX11-NEXT:    ; return to shader part epilog
255main_body:
256  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8188, i32 0, i32 0)
257  ret <4 x float> %data
258}
259
260define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
261; PREGFX10-LABEL: buffer_load_voffset_large_16bit:
262; PREGFX10:       ; %bb.0: ; %main_body
263; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
264; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
265; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
266; PREGFX10-NEXT:    ; return to shader part epilog
267;
268; GFX10-LABEL: buffer_load_voffset_large_16bit:
269; GFX10:       ; %bb.0: ; %main_body
270; GFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
271; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
272; GFX10-NEXT:    s_waitcnt vmcnt(0)
273; GFX10-NEXT:    ; return to shader part epilog
274;
275; GFX11-LABEL: buffer_load_voffset_large_16bit:
276; GFX11:       ; %bb.0: ; %main_body
277; GFX11-NEXT:    v_mov_b32_e32 v0, 0xf000
278; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
279; GFX11-NEXT:    s_waitcnt vmcnt(0)
280; GFX11-NEXT:    ; return to shader part epilog
281main_body:
282  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 65532, i32 0, i32 0)
283  ret <4 x float> %data
284}
285
286define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
287; PREGFX10-LABEL: buffer_load_voffset_large_23bit:
288; PREGFX10:       ; %bb.0: ; %main_body
289; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
290; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
291; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
292; PREGFX10-NEXT:    ; return to shader part epilog
293;
294; GFX10-LABEL: buffer_load_voffset_large_23bit:
295; GFX10:       ; %bb.0: ; %main_body
296; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
297; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
298; GFX10-NEXT:    s_waitcnt vmcnt(0)
299; GFX10-NEXT:    ; return to shader part epilog
300;
301; GFX11-LABEL: buffer_load_voffset_large_23bit:
302; GFX11:       ; %bb.0: ; %main_body
303; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7ff000
304; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
305; GFX11-NEXT:    s_waitcnt vmcnt(0)
306; GFX11-NEXT:    ; return to shader part epilog
307main_body:
308  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8388604, i32 0, i32 0)
309  ret <4 x float> %data
310}
311
312define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
313; PREGFX10-LABEL: buffer_load_voffset_large_24bit:
314; PREGFX10:       ; %bb.0: ; %main_body
315; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
316; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
317; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
318; PREGFX10-NEXT:    ; return to shader part epilog
319;
320; GFX10-LABEL: buffer_load_voffset_large_24bit:
321; GFX10:       ; %bb.0: ; %main_body
322; GFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
323; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092
324; GFX10-NEXT:    s_waitcnt vmcnt(0)
325; GFX10-NEXT:    ; return to shader part epilog
326;
327; GFX11-LABEL: buffer_load_voffset_large_24bit:
328; GFX11:       ; %bb.0: ; %main_body
329; GFX11-NEXT:    v_mov_b32_e32 v0, 0xfff000
330; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092
331; GFX11-NEXT:    s_waitcnt vmcnt(0)
332; GFX11-NEXT:    ; return to shader part epilog
333main_body:
334  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 16777212, i32 0, i32 0)
335  ret <4 x float> %data
336}
337
338
339define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
340; PREGFX10-LABEL: buffer_load_x1:
341; PREGFX10:       ; %bb.0: ; %main_body
342; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
343; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
344; PREGFX10-NEXT:    ; return to shader part epilog
345;
346; GFX10-LABEL: buffer_load_x1:
347; GFX10:       ; %bb.0: ; %main_body
348; GFX10-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen
349; GFX10-NEXT:    s_waitcnt vmcnt(0)
350; GFX10-NEXT:    ; return to shader part epilog
351;
352; GFX11-LABEL: buffer_load_x1:
353; GFX11:       ; %bb.0: ; %main_body
354; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen
355; GFX11-NEXT:    s_waitcnt vmcnt(0)
356; GFX11-NEXT:    ; return to shader part epilog
357main_body:
358  %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
359  ret float %data
360}
361
362define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %ofs) {
363; PREGFX10-LABEL: buffer_load_x2:
364; PREGFX10:       ; %bb.0: ; %main_body
365; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
366; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
367; PREGFX10-NEXT:    ; return to shader part epilog
368;
369; GFX10-LABEL: buffer_load_x2:
370; GFX10:       ; %bb.0: ; %main_body
371; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen
372; GFX10-NEXT:    s_waitcnt vmcnt(0)
373; GFX10-NEXT:    ; return to shader part epilog
374;
375; GFX11-LABEL: buffer_load_x2:
376; GFX11:       ; %bb.0: ; %main_body
377; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen
378; GFX11-NEXT:    s_waitcnt vmcnt(0)
379; GFX11-NEXT:    ; return to shader part epilog
380main_body:
381  %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0)
382  ret <2 x float> %data
383}
384
385define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) {
386; GFX10-LABEL: buffer_load_negative_offset:
387; GFX10:       ; %bb.0: ; %main_body
388; GFX10-NEXT:    v_add_nc_u32_e32 v0, -16, v0
389; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen
390; GFX10-NEXT:    s_waitcnt vmcnt(0)
391; GFX10-NEXT:    ; return to shader part epilog
392;
393; GFX11-LABEL: buffer_load_negative_offset:
394; GFX11:       ; %bb.0: ; %main_body
395; GFX11-NEXT:    v_add_nc_u32_e32 v0, -16, v0
396; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen
397; GFX11-NEXT:    s_waitcnt vmcnt(0)
398; GFX11-NEXT:    ; return to shader part epilog
399main_body:
400  %ofs.1 = add i32 %ofs, -16
401  %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs.1, i32 0, i32 0)
402  ret <4 x float> %data
403}
404
405define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) {
406; GFX10-LABEL: buffer_load_mmo:
407; GFX10:       ; %bb.0: ; %entry
408; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0
409; GFX10-NEXT:    v_mov_b32_e32 v2, 0
410; GFX10-NEXT:    ds_write2_b32 v0, v2, v2 offset1:4
411; GFX10-NEXT:    s_waitcnt vmcnt(0)
412; GFX10-NEXT:    v_mov_b32_e32 v0, v1
413; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
414; GFX10-NEXT:    ; return to shader part epilog
415;
416; GFX11-LABEL: buffer_load_mmo:
417; GFX11:       ; %bb.0: ; %entry
418; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0
419; GFX11-NEXT:    v_mov_b32_e32 v2, 0
420; GFX11-NEXT:    ds_store_2addr_b32 v0, v2, v2 offset1:4
421; GFX11-NEXT:    s_waitcnt vmcnt(0)
422; GFX11-NEXT:    v_mov_b32_e32 v0, v1
423; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
424; GFX11-NEXT:    ; return to shader part epilog
425entry:
426  store float 0.0, ptr addrspace(3) %lds
427  %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
428  %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
429  store float 0.0, ptr addrspace(3) %tmp2
430  ret float %val
431}
432
433define amdgpu_ps void @buffer_load_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
434; PREGFX10-LABEL: buffer_load_x1_offen_merged_and:
435; PREGFX10:       ; %bb.0: ; %main_body
436; PREGFX10-NEXT:    buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
437; PREGFX10-NEXT:    buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
438; PREGFX10-NEXT:    s_waitcnt vmcnt(1)
439; PREGFX10-NEXT:    exp mrt0 v1, v2, v3, v4 done vm
440; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
441; PREGFX10-NEXT:    exp mrt0 v5, v6, v0, v0 done vm
442; PREGFX10-NEXT:    s_endpgm
443;
444; GFX10-LABEL: buffer_load_x1_offen_merged_and:
445; GFX10:       ; %bb.0: ; %main_body
446; GFX10-NEXT:    s_clause 0x1
447; GFX10-NEXT:    buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4
448; GFX10-NEXT:    buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28
449; GFX10-NEXT:    s_waitcnt vmcnt(1)
450; GFX10-NEXT:    exp mrt0 v1, v2, v3, v4 done vm
451; GFX10-NEXT:    s_waitcnt vmcnt(0)
452; GFX10-NEXT:    exp mrt0 v5, v6, v0, v0 done vm
453; GFX10-NEXT:    s_endpgm
454;
455; GFX11-LABEL: buffer_load_x1_offen_merged_and:
456; GFX11:       ; %bb.0: ; %main_body
457; GFX11-NEXT:    s_clause 0x1
458; GFX11-NEXT:    buffer_load_b128 v[1:4], v0, s[0:3], 0 offen offset:4
459; GFX11-NEXT:    buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28
460; GFX11-NEXT:    s_waitcnt vmcnt(1)
461; GFX11-NEXT:    exp mrt0 v1, v2, v3, v4 done
462; GFX11-NEXT:    s_waitcnt vmcnt(0)
463; GFX11-NEXT:    exp mrt0 v5, v6, v0, v0 done
464; GFX11-NEXT:    s_endpgm
465main_body:
466  %a1 = add i32 %a, 4
467  %a2 = add i32 %a, 8
468  %a3 = add i32 %a, 12
469  %a4 = add i32 %a, 16
470  %a5 = add i32 %a, 28
471  %a6 = add i32 %a, 32
472  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
473  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
474  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
475  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
476  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
477  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
478  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
479  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
480  ret void
481}
482
483define amdgpu_ps void @buffer_load_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
484; PREGFX10-LABEL: buffer_load_x1_offen_merged_or:
485; PREGFX10:       ; %bb.0: ; %main_body
486; PREGFX10-NEXT:    v_lshlrev_b32_e32 v4, 6, v0
487; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v4, s[0:3], 0 offen offset:4
488; PREGFX10-NEXT:    buffer_load_dwordx2 v[4:5], v4, s[0:3], 0 offen offset:28
489; PREGFX10-NEXT:    s_waitcnt vmcnt(1)
490; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
491; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
492; PREGFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
493; PREGFX10-NEXT:    s_endpgm
494;
495; GFX10-LABEL: buffer_load_x1_offen_merged_or:
496; GFX10:       ; %bb.0: ; %main_body
497; GFX10-NEXT:    v_lshlrev_b32_e32 v6, 6, v0
498; GFX10-NEXT:    s_clause 0x1
499; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 offen offset:4
500; GFX10-NEXT:    buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 offen offset:28
501; GFX10-NEXT:    s_waitcnt vmcnt(1)
502; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
503; GFX10-NEXT:    s_waitcnt vmcnt(0)
504; GFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
505; GFX10-NEXT:    s_endpgm
506;
507; GFX11-LABEL: buffer_load_x1_offen_merged_or:
508; GFX11:       ; %bb.0: ; %main_body
509; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 6, v0
510; GFX11-NEXT:    s_clause 0x1
511; GFX11-NEXT:    buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:4
512; GFX11-NEXT:    buffer_load_b64 v[4:5], v4, s[0:3], 0 offen offset:28
513; GFX11-NEXT:    s_waitcnt vmcnt(1)
514; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
515; GFX11-NEXT:    s_waitcnt vmcnt(0)
516; GFX11-NEXT:    exp mrt0 v4, v5, v0, v0 done
517; GFX11-NEXT:    s_endpgm
518main_body:
519  %a = shl i32 %inp, 6
520  %a1 = or i32 %a, 4
521  %a2 = or i32 %a, 8
522  %a3 = or i32 %a, 12
523  %a4 = or i32 %a, 16
524  %a5 = or i32 %a, 28
525  %a6 = or i32 %a, 32
526  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
527  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
528  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0)
529  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0)
530  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0)
531  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0)
532  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
533  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
534  ret void
535}
536
537define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a) {
538; PREGFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
539; PREGFX10:       ; %bb.0: ; %main_body
540; PREGFX10-NEXT:    buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
541; PREGFX10-NEXT:    buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
542; PREGFX10-NEXT:    buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
543; PREGFX10-NEXT:    s_waitcnt vmcnt(1)
544; PREGFX10-NEXT:    exp mrt0 v1, v2, v3, v4 done vm
545; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
546; PREGFX10-NEXT:    exp mrt0 v5, v6, v0, v0 done vm
547; PREGFX10-NEXT:    s_endpgm
548;
549; GFX10-LABEL: buffer_load_x1_offen_merged_glc_slc:
550; GFX10:       ; %bb.0: ; %main_body
551; GFX10-NEXT:    s_clause 0x2
552; GFX10-NEXT:    buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4
553; GFX10-NEXT:    buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc
554; GFX10-NEXT:    buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
555; GFX10-NEXT:    s_waitcnt vmcnt(1)
556; GFX10-NEXT:    exp mrt0 v1, v2, v3, v4 done vm
557; GFX10-NEXT:    s_waitcnt vmcnt(0)
558; GFX10-NEXT:    exp mrt0 v5, v6, v0, v0 done vm
559; GFX10-NEXT:    s_endpgm
560;
561; GFX11-LABEL: buffer_load_x1_offen_merged_glc_slc:
562; GFX11:       ; %bb.0: ; %main_body
563; GFX11-NEXT:    s_clause 0x2
564; GFX11-NEXT:    buffer_load_b64 v[1:2], v0, s[0:3], 0 offen offset:4
565; GFX11-NEXT:    buffer_load_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc
566; GFX11-NEXT:    buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc
567; GFX11-NEXT:    s_waitcnt vmcnt(1)
568; GFX11-NEXT:    exp mrt0 v1, v2, v3, v4 done
569; GFX11-NEXT:    s_waitcnt vmcnt(0)
570; GFX11-NEXT:    exp mrt0 v5, v6, v0, v0 done
571; GFX11-NEXT:    s_endpgm
572main_body:
573  %a1 = add i32 %a, 4
574  %a2 = add i32 %a, 8
575  %a3 = add i32 %a, 12
576  %a4 = add i32 %a, 16
577  %a5 = add i32 %a, 28
578  %a6 = add i32 %a, 32
579  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
580  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
581  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1)
582  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1)
583  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3)
584  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3)
585  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
586  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
587  ret void
588}
589
590define amdgpu_ps void @buffer_load_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) {
591; PREGFX10-LABEL: buffer_load_x2_offen_merged_and:
592; PREGFX10:       ; %bb.0: ; %main_body
593; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
594; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
595; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
596; PREGFX10-NEXT:    s_endpgm
597;
598; GFX10-LABEL: buffer_load_x2_offen_merged_and:
599; GFX10:       ; %bb.0: ; %main_body
600; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
601; GFX10-NEXT:    s_waitcnt vmcnt(0)
602; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
603; GFX10-NEXT:    s_endpgm
604;
605; GFX11-LABEL: buffer_load_x2_offen_merged_and:
606; GFX11:       ; %bb.0: ; %main_body
607; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
608; GFX11-NEXT:    s_waitcnt vmcnt(0)
609; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
610; GFX11-NEXT:    s_endpgm
611main_body:
612  %a1 = add i32 %a, 4
613  %a2 = add i32 %a, 12
614  %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
615  %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
616  %r1 = extractelement <2 x float> %vr1, i32 0
617  %r2 = extractelement <2 x float> %vr1, i32 1
618  %r3 = extractelement <2 x float> %vr2, i32 0
619  %r4 = extractelement <2 x float> %vr2, i32 1
620  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
621  ret void
622}
623
624define amdgpu_ps void @buffer_load_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) {
625; PREGFX10-LABEL: buffer_load_x2_offen_merged_or:
626; PREGFX10:       ; %bb.0: ; %main_body
627; PREGFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
628; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
629; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
630; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
631; PREGFX10-NEXT:    s_endpgm
632;
633; GFX10-LABEL: buffer_load_x2_offen_merged_or:
634; GFX10:       ; %bb.0: ; %main_body
635; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
636; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4
637; GFX10-NEXT:    s_waitcnt vmcnt(0)
638; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
639; GFX10-NEXT:    s_endpgm
640;
641; GFX11-LABEL: buffer_load_x2_offen_merged_or:
642; GFX11:       ; %bb.0: ; %main_body
643; GFX11-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
644; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4
645; GFX11-NEXT:    s_waitcnt vmcnt(0)
646; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
647; GFX11-NEXT:    s_endpgm
648main_body:
649  %a = shl i32 %inp, 4
650  %a1 = add i32 %a, 4
651  %a2 = add i32 %a, 12
652  %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0)
653  %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0)
654  %r1 = extractelement <2 x float> %vr1, i32 0
655  %r2 = extractelement <2 x float> %vr1, i32 1
656  %r3 = extractelement <2 x float> %vr2, i32 0
657  %r4 = extractelement <2 x float> %vr2, i32 1
658  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
659  ret void
660}
661
662define amdgpu_ps void @buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
663; PREGFX10-LABEL: buffer_load_x1_offset_merged:
664; PREGFX10:       ; %bb.0: ; %main_body
665; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
666; PREGFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
667; PREGFX10-NEXT:    s_waitcnt vmcnt(1)
668; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
669; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
670; PREGFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
671; PREGFX10-NEXT:    s_endpgm
672;
673; GFX10-LABEL: buffer_load_x1_offset_merged:
674; GFX10:       ; %bb.0: ; %main_body
675; GFX10-NEXT:    s_clause 0x1
676; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
677; GFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
678; GFX10-NEXT:    s_waitcnt vmcnt(1)
679; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
680; GFX10-NEXT:    s_waitcnt vmcnt(0)
681; GFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
682; GFX10-NEXT:    s_endpgm
683;
684; GFX11-LABEL: buffer_load_x1_offset_merged:
685; GFX11:       ; %bb.0: ; %main_body
686; GFX11-NEXT:    s_clause 0x1
687; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
688; GFX11-NEXT:    buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
689; GFX11-NEXT:    s_waitcnt vmcnt(1)
690; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
691; GFX11-NEXT:    s_waitcnt vmcnt(0)
692; GFX11-NEXT:    exp mrt0 v4, v5, v0, v0 done
693; GFX11-NEXT:    s_endpgm
694main_body:
695  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
696  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
697  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
698  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
699  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
700  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
701  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
702  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
703  ret void
704}
705
706define amdgpu_ps void @buffer_load_x2_offset_merged(ptr addrspace(8) inreg %rsrc) {
707; PREGFX10-LABEL: buffer_load_x2_offset_merged:
708; PREGFX10:       ; %bb.0: ; %main_body
709; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
710; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
711; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
712; PREGFX10-NEXT:    s_endpgm
713;
714; GFX10-LABEL: buffer_load_x2_offset_merged:
715; GFX10:       ; %bb.0: ; %main_body
716; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
717; GFX10-NEXT:    s_waitcnt vmcnt(0)
718; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
719; GFX10-NEXT:    s_endpgm
720;
721; GFX11-LABEL: buffer_load_x2_offset_merged:
722; GFX11:       ; %bb.0: ; %main_body
723; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
724; GFX11-NEXT:    s_waitcnt vmcnt(0)
725; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
726; GFX11-NEXT:    s_endpgm
727main_body:
728  %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
729  %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
730  %r1 = extractelement <2 x float> %vr1, i32 0
731  %r2 = extractelement <2 x float> %vr1, i32 1
732  %r3 = extractelement <2 x float> %vr2, i32 0
733  %r4 = extractelement <2 x float> %vr2, i32 1
734  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
735  ret void
736}
737
738define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) {
739; PREGFX10-LABEL: buffer_load_int:
740; PREGFX10:       ; %bb.0: ; %main_body
741; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
742; PREGFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
743; PREGFX10-NEXT:    buffer_load_dword v6, off, s[0:3], 0 slc
744; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
745; PREGFX10-NEXT:    ; return to shader part epilog
746;
747; GFX10-LABEL: buffer_load_int:
748; GFX10:       ; %bb.0: ; %main_body
749; GFX10-NEXT:    s_clause 0x2
750; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
751; GFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
752; GFX10-NEXT:    buffer_load_dword v6, off, s[0:3], 0 slc
753; GFX10-NEXT:    s_waitcnt vmcnt(0)
754; GFX10-NEXT:    ; return to shader part epilog
755;
756; GFX11-LABEL: buffer_load_int:
757; GFX11:       ; %bb.0: ; %main_body
758; GFX11-NEXT:    s_clause 0x2
759; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
760; GFX11-NEXT:    buffer_load_b64 v[4:5], off, s[0:3], 0 glc
761; GFX11-NEXT:    buffer_load_b32 v6, off, s[0:3], 0 slc
762; GFX11-NEXT:    s_waitcnt vmcnt(0)
763; GFX11-NEXT:    ; return to shader part epilog
764main_body:
765  %data = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0)
766  %data_glc = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 1)
767  %data_slc = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 2)
768  %fdata = bitcast <4 x i32> %data to <4 x float>
769  %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
770  %fdata_slc = bitcast i32 %data_slc to float
771  %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
772  %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
773  %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
774  ret {<4 x float>, <2 x float>, float} %r2
775}
776
777define amdgpu_ps float @raw_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc) {
778; PREGFX10-LABEL: raw_ptr_buffer_load_ubyte:
779; PREGFX10:       ; %bb.0: ; %main_body
780; PREGFX10-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
781; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
782; PREGFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
783; PREGFX10-NEXT:    ; return to shader part epilog
784;
785; GFX10-LABEL: raw_ptr_buffer_load_ubyte:
786; GFX10:       ; %bb.0: ; %main_body
787; GFX10-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
788; GFX10-NEXT:    s_waitcnt vmcnt(0)
789; GFX10-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
790; GFX10-NEXT:    ; return to shader part epilog
791;
792; GFX11-LABEL: raw_ptr_buffer_load_ubyte:
793; GFX11:       ; %bb.0: ; %main_body
794; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
795; GFX11-NEXT:    s_waitcnt vmcnt(0)
796; GFX11-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
797; GFX11-NEXT:    ; return to shader part epilog
798main_body:
799  %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
800  %tmp2 = zext i8 %tmp to i32
801  %val = uitofp i32 %tmp2 to float
802  ret float %val
803}
804
805define amdgpu_ps float @raw_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc) {
806; PREGFX10-LABEL: raw_ptr_buffer_load_i16:
807; PREGFX10:       ; %bb.0: ; %main_body
808; PREGFX10-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
809; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
810; PREGFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
811; PREGFX10-NEXT:    ; return to shader part epilog
812;
813; GFX10-LABEL: raw_ptr_buffer_load_i16:
814; GFX10:       ; %bb.0: ; %main_body
815; GFX10-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
816; GFX10-NEXT:    s_waitcnt vmcnt(0)
817; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, v0
818; GFX10-NEXT:    ; return to shader part epilog
819;
820; GFX11-LABEL: raw_ptr_buffer_load_i16:
821; GFX11:       ; %bb.0: ; %main_body
822; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
823; GFX11-NEXT:    s_waitcnt vmcnt(0)
824; GFX11-NEXT:    v_cvt_f32_u32_e32 v0, v0
825; GFX11-NEXT:    ; return to shader part epilog
826main_body:
827  %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
828  %tmp2 = zext i16 %tmp to i32
829  %val = uitofp i32 %tmp2 to float
830  ret float %val
831}
832
833define amdgpu_ps float @raw_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc) {
834; PREGFX10-LABEL: raw_ptr_buffer_load_sbyte:
835; PREGFX10:       ; %bb.0: ; %main_body
836; PREGFX10-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0
837; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
838; PREGFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
839; PREGFX10-NEXT:    ; return to shader part epilog
840;
841; GFX10-LABEL: raw_ptr_buffer_load_sbyte:
842; GFX10:       ; %bb.0: ; %main_body
843; GFX10-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0
844; GFX10-NEXT:    s_waitcnt vmcnt(0)
845; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
846; GFX10-NEXT:    ; return to shader part epilog
847;
848; GFX11-LABEL: raw_ptr_buffer_load_sbyte:
849; GFX11:       ; %bb.0: ; %main_body
850; GFX11-NEXT:    buffer_load_i8 v0, off, s[0:3], 0
851; GFX11-NEXT:    s_waitcnt vmcnt(0)
852; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
853; GFX11-NEXT:    ; return to shader part epilog
854main_body:
855  %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
856  %tmp2 = sext i8 %tmp to i32
857  %val = sitofp i32 %tmp2 to float
858  ret float %val
859}
860
861define amdgpu_ps float @raw_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc) {
862; PREGFX10-LABEL: raw_ptr_buffer_load_sshort:
863; PREGFX10:       ; %bb.0: ; %main_body
864; PREGFX10-NEXT:    buffer_load_sshort v0, off, s[0:3], 0
865; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
866; PREGFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
867; PREGFX10-NEXT:    ; return to shader part epilog
868;
869; GFX10-LABEL: raw_ptr_buffer_load_sshort:
870; GFX10:       ; %bb.0: ; %main_body
871; GFX10-NEXT:    buffer_load_sshort v0, off, s[0:3], 0
872; GFX10-NEXT:    s_waitcnt vmcnt(0)
873; GFX10-NEXT:    v_cvt_f32_i32_e32 v0, v0
874; GFX10-NEXT:    ; return to shader part epilog
875;
876; GFX11-LABEL: raw_ptr_buffer_load_sshort:
877; GFX11:       ; %bb.0: ; %main_body
878; GFX11-NEXT:    buffer_load_i16 v0, off, s[0:3], 0
879; GFX11-NEXT:    s_waitcnt vmcnt(0)
880; GFX11-NEXT:    v_cvt_f32_i32_e32 v0, v0
881; GFX11-NEXT:    ; return to shader part epilog
882main_body:
883  %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
884  %tmp2 = sext i16 %tmp to i32
885  %val = sitofp i32 %tmp2 to float
886  ret float %val
887}
888
889define amdgpu_ps void @raw_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
890; PREGFX10-LABEL: raw_ptr_buffer_load_f16:
891; PREGFX10:       ; %bb.0: ; %main_body
892; PREGFX10-NEXT:    buffer_load_ushort v1, off, s[0:3], 0
893; PREGFX10-NEXT:    s_mov_b32 m0, -1
894; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
895; PREGFX10-NEXT:    ds_write_b16 v0, v1
896; PREGFX10-NEXT:    s_endpgm
897;
898; GFX10-LABEL: raw_ptr_buffer_load_f16:
899; GFX10:       ; %bb.0: ; %main_body
900; GFX10-NEXT:    buffer_load_ushort v1, off, s[0:3], 0
901; GFX10-NEXT:    s_waitcnt vmcnt(0)
902; GFX10-NEXT:    ds_write_b16 v0, v1
903; GFX10-NEXT:    s_endpgm
904;
905; GFX11-LABEL: raw_ptr_buffer_load_f16:
906; GFX11:       ; %bb.0: ; %main_body
907; GFX11-NEXT:    buffer_load_u16 v1, off, s[0:3], 0
908; GFX11-NEXT:    s_waitcnt vmcnt(0)
909; GFX11-NEXT:    ds_store_b16 v0, v1
910; GFX11-NEXT:    s_endpgm
911main_body:
912  %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
913  store half %val, ptr addrspace(3) %ptr
914  ret void
915}
916
917define amdgpu_ps void @raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
918; PREGFX10-LABEL: raw_ptr_buffer_load_v2f16:
919; PREGFX10:       ; %bb.0: ; %main_body
920; PREGFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0
921; PREGFX10-NEXT:    s_mov_b32 m0, -1
922; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
923; PREGFX10-NEXT:    ds_write_b32 v0, v1
924; PREGFX10-NEXT:    s_endpgm
925;
926; GFX10-LABEL: raw_ptr_buffer_load_v2f16:
927; GFX10:       ; %bb.0: ; %main_body
928; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0
929; GFX10-NEXT:    s_waitcnt vmcnt(0)
930; GFX10-NEXT:    ds_write_b32 v0, v1
931; GFX10-NEXT:    s_endpgm
932;
933; GFX11-LABEL: raw_ptr_buffer_load_v2f16:
934; GFX11:       ; %bb.0: ; %main_body
935; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0
936; GFX11-NEXT:    s_waitcnt vmcnt(0)
937; GFX11-NEXT:    ds_store_b32 v0, v1
938; GFX11-NEXT:    s_endpgm
939main_body:
940  %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
941  store <2 x half> %val, ptr addrspace(3) %ptr
942  ret void
943}
944
945define amdgpu_ps void @raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
946; PREGFX10-LABEL: raw_ptr_buffer_load_v4f16:
947; PREGFX10:       ; %bb.0:
948; PREGFX10-NEXT:    buffer_load_dwordx2 v[1:2], off, s[0:3], 0
949; PREGFX10-NEXT:    s_mov_b32 m0, -1
950; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
951; PREGFX10-NEXT:    ds_write_b64 v0, v[1:2]
952; PREGFX10-NEXT:    s_endpgm
953;
954; GFX10-LABEL: raw_ptr_buffer_load_v4f16:
955; GFX10:       ; %bb.0:
956; GFX10-NEXT:    buffer_load_dwordx2 v[1:2], off, s[0:3], 0
957; GFX10-NEXT:    s_waitcnt vmcnt(0)
958; GFX10-NEXT:    ds_write_b64 v0, v[1:2]
959; GFX10-NEXT:    s_endpgm
960;
961; GFX11-LABEL: raw_ptr_buffer_load_v4f16:
962; GFX11:       ; %bb.0:
963; GFX11-NEXT:    buffer_load_b64 v[1:2], off, s[0:3], 0
964; GFX11-NEXT:    s_waitcnt vmcnt(0)
965; GFX11-NEXT:    ds_store_b64 v0, v[1:2]
966; GFX11-NEXT:    s_endpgm
967  %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
968  store <4 x half> %val, ptr addrspace(3) %ptr
969  ret void
970}
971
972; FIXME
973; define amdgpu_ps void @raw_ptr_buffer_load_v6f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
974;   %val = call <6 x half> @llvm.amdgcn.raw.ptr.buffer.load.v6f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
975;   store <6 x half> %val, ptr addrspace(3) %ptr
976;   ret void
977; }
978
979define amdgpu_ps void @raw_ptr_buffer_load_v8f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
980; GFX10-LABEL: raw_ptr_buffer_load_v8f16:
981; GFX10:       ; %bb.0:
982; GFX10-NEXT:    buffer_load_dwordx4 v[1:4], off, s[0:3], 0
983; GFX10-NEXT:    s_waitcnt vmcnt(0)
984; GFX10-NEXT:    ds_write_b128 v0, v[1:4]
985; GFX10-NEXT:    s_endpgm
986;
987; GFX11-LABEL: raw_ptr_buffer_load_v8f16:
988; GFX11:       ; %bb.0:
989; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0
990; GFX11-NEXT:    s_waitcnt vmcnt(0)
991; GFX11-NEXT:    ds_store_b128 v0, v[1:4]
992; GFX11-NEXT:    s_endpgm
993  %val = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
994  store <8 x half> %val, ptr addrspace(3) %ptr
995  ret void
996}
997
998define amdgpu_ps void @raw_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
999; PREGFX10-LABEL: raw_ptr_buffer_load_v2i16:
1000; PREGFX10:       ; %bb.0: ; %main_body
1001; PREGFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0
1002; PREGFX10-NEXT:    s_mov_b32 m0, -1
1003; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1004; PREGFX10-NEXT:    ds_write_b32 v0, v1
1005; PREGFX10-NEXT:    s_endpgm
1006;
1007; GFX10-LABEL: raw_ptr_buffer_load_v2i16:
1008; GFX10:       ; %bb.0: ; %main_body
1009; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0
1010; GFX10-NEXT:    s_waitcnt vmcnt(0)
1011; GFX10-NEXT:    ds_write_b32 v0, v1
1012; GFX10-NEXT:    s_endpgm
1013;
1014; GFX11-LABEL: raw_ptr_buffer_load_v2i16:
1015; GFX11:       ; %bb.0: ; %main_body
1016; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0
1017; GFX11-NEXT:    s_waitcnt vmcnt(0)
1018; GFX11-NEXT:    ds_store_b32 v0, v1
1019; GFX11-NEXT:    s_endpgm
1020main_body:
1021  %val = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1022  store <2 x i16> %val, ptr addrspace(3) %ptr
1023  ret void
1024}
1025
1026define amdgpu_ps void @raw_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1027; PREGFX10-LABEL: raw_ptr_buffer_load_v4i16:
1028; PREGFX10:       ; %bb.0:
1029; PREGFX10-NEXT:    buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1030; PREGFX10-NEXT:    s_mov_b32 m0, -1
1031; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1032; PREGFX10-NEXT:    ds_write_b64 v0, v[1:2]
1033; PREGFX10-NEXT:    s_endpgm
1034;
1035; GFX10-LABEL: raw_ptr_buffer_load_v4i16:
1036; GFX10:       ; %bb.0:
1037; GFX10-NEXT:    buffer_load_dwordx2 v[1:2], off, s[0:3], 0
1038; GFX10-NEXT:    s_waitcnt vmcnt(0)
1039; GFX10-NEXT:    ds_write_b64 v0, v[1:2]
1040; GFX10-NEXT:    s_endpgm
1041;
1042; GFX11-LABEL: raw_ptr_buffer_load_v4i16:
1043; GFX11:       ; %bb.0:
1044; GFX11-NEXT:    buffer_load_b64 v[1:2], off, s[0:3], 0
1045; GFX11-NEXT:    s_waitcnt vmcnt(0)
1046; GFX11-NEXT:    ds_store_b64 v0, v[1:2]
1047; GFX11-NEXT:    s_endpgm
1048  %val = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1049  store <4 x i16> %val, ptr addrspace(3) %ptr
1050  ret void
1051}
1052
1053; FIXME
1054; define amdgpu_ps void @raw_ptr_buffer_load_v6i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1055;   %val = call <6 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v6i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1056;   store <6 x i16> %val, ptr addrspace(3) %ptr
1057;   ret void
1058; }
1059
1060define amdgpu_ps void @raw_ptr_buffer_load_v8i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) {
1061; GFX10-LABEL: raw_ptr_buffer_load_v8i16:
1062; GFX10:       ; %bb.0:
1063; GFX10-NEXT:    buffer_load_dwordx4 v[1:4], off, s[0:3], 0
1064; GFX10-NEXT:    s_waitcnt vmcnt(0)
1065; GFX10-NEXT:    ds_write_b128 v0, v[1:4]
1066; GFX10-NEXT:    s_endpgm
1067;
1068; GFX11-LABEL: raw_ptr_buffer_load_v8i16:
1069; GFX11:       ; %bb.0:
1070; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0
1071; GFX11-NEXT:    s_waitcnt vmcnt(0)
1072; GFX11-NEXT:    ds_store_b128 v0, v[1:4]
1073; GFX11-NEXT:    s_endpgm
1074  %val = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
1075  store <8 x i16> %val, ptr addrspace(3) %ptr
1076  ret void
1077}
1078
1079define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) {
1080; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1081; PREGFX10:       ; %bb.0: ; %main_body
1082; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1083; PREGFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1084; PREGFX10-NEXT:    s_waitcnt vmcnt(1)
1085; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
1086; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1087; PREGFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
1088; PREGFX10-NEXT:    s_endpgm
1089;
1090; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1091; GFX10:       ; %bb.0: ; %main_body
1092; GFX10-NEXT:    s_clause 0x1
1093; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4
1094; GFX10-NEXT:    buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28
1095; GFX10-NEXT:    s_waitcnt vmcnt(1)
1096; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
1097; GFX10-NEXT:    s_waitcnt vmcnt(0)
1098; GFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
1099; GFX10-NEXT:    s_endpgm
1100;
1101; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_merged:
1102; GFX11:       ; %bb.0: ; %main_body
1103; GFX11-NEXT:    s_clause 0x1
1104; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4
1105; GFX11-NEXT:    buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28
1106; GFX11-NEXT:    s_waitcnt vmcnt(1)
1107; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
1108; GFX11-NEXT:    s_waitcnt vmcnt(0)
1109; GFX11-NEXT:    exp mrt0 v4, v5, v0, v0 done
1110; GFX11-NEXT:    s_endpgm
1111main_body:
1112  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0)
1113  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0)
1114  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0)
1115  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0)
1116  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0)
1117  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0)
1118  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1119  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1120  ret void
1121}
1122
1123define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc) {
1124; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1125; PREGFX10:       ; %bb.0: ; %main_body
1126; PREGFX10-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4
1127; PREGFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:8
1128; PREGFX10-NEXT:    buffer_load_dword v2, off, s[0:3], 0 offset:12
1129; PREGFX10-NEXT:    buffer_load_dword v3, off, s[0:3], 0 offset:16
1130; PREGFX10-NEXT:    buffer_load_dword v4, off, s[0:3], 0 offset:28
1131; PREGFX10-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:32
1132; PREGFX10-NEXT:    s_waitcnt vmcnt(2)
1133; PREGFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
1134; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1135; PREGFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
1136; PREGFX10-NEXT:    s_endpgm
1137;
1138; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1139; GFX10:       ; %bb.0: ; %main_body
1140; GFX10-NEXT:    s_clause 0x5
1141; GFX10-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4
1142; GFX10-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:8
1143; GFX10-NEXT:    buffer_load_dword v2, off, s[0:3], 0 offset:12
1144; GFX10-NEXT:    buffer_load_dword v3, off, s[0:3], 0 offset:16
1145; GFX10-NEXT:    buffer_load_dword v4, off, s[0:3], 0 offset:28
1146; GFX10-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:32
1147; GFX10-NEXT:    s_waitcnt vmcnt(2)
1148; GFX10-NEXT:    exp mrt0 v0, v1, v2, v3 done vm
1149; GFX10-NEXT:    s_waitcnt vmcnt(0)
1150; GFX10-NEXT:    exp mrt0 v4, v5, v0, v0 done vm
1151; GFX10-NEXT:    s_endpgm
1152;
1153; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged:
1154; GFX11:       ; %bb.0: ; %main_body
1155; GFX11-NEXT:    s_clause 0x5
1156; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0 offset:4
1157; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0 offset:8
1158; GFX11-NEXT:    buffer_load_b32 v2, off, s[0:3], 0 offset:12
1159; GFX11-NEXT:    buffer_load_b32 v3, off, s[0:3], 0 offset:16
1160; GFX11-NEXT:    buffer_load_b32 v4, off, s[0:3], 0 offset:28
1161; GFX11-NEXT:    buffer_load_b32 v5, off, s[0:3], 0 offset:32
1162; GFX11-NEXT:    s_waitcnt vmcnt(2)
1163; GFX11-NEXT:    exp mrt0 v0, v1, v2, v3 done
1164; GFX11-NEXT:    s_waitcnt vmcnt(0)
1165; GFX11-NEXT:    exp mrt0 v4, v5, v0, v0 done
1166; GFX11-NEXT:    s_endpgm
1167main_body:
1168  %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8)
1169  %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8)
1170  %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8)
1171  %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8)
1172  %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8)
1173  %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8)
1174  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
1175  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
1176  ret void
1177}
1178
1179define double @buffer_load_f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1180; PREGFX10-LABEL: buffer_load_f64__voffset_add:
1181; PREGFX10:       ; %bb.0:
1182; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1184; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1185; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1186;
1187; GFX10-LABEL: buffer_load_f64__voffset_add:
1188; GFX10:       ; %bb.0:
1189; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1191; GFX10-NEXT:    s_waitcnt vmcnt(0)
1192; GFX10-NEXT:    s_setpc_b64 s[30:31]
1193;
1194; GFX11-LABEL: buffer_load_f64__voffset_add:
1195; GFX11:       ; %bb.0:
1196; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1198; GFX11-NEXT:    s_waitcnt vmcnt(0)
1199; GFX11-NEXT:    s_setpc_b64 s[30:31]
1200  %voffset.add = add i32 %voffset, 60
1201  %data = call double @llvm.amdgcn.raw.ptr.buffer.load.f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1202  ret double %data
1203}
1204
1205define <2 x double> @buffer_load_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1206; PREGFX10-LABEL: buffer_load_v2f64__voffset_add:
1207; PREGFX10:       ; %bb.0:
1208; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1210; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1211; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1212;
1213; GFX10-LABEL: buffer_load_v2f64__voffset_add:
1214; GFX10:       ; %bb.0:
1215; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1217; GFX10-NEXT:    s_waitcnt vmcnt(0)
1218; GFX10-NEXT:    s_setpc_b64 s[30:31]
1219;
1220; GFX11-LABEL: buffer_load_v2f64__voffset_add:
1221; GFX11:       ; %bb.0:
1222; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1224; GFX11-NEXT:    s_waitcnt vmcnt(0)
1225; GFX11-NEXT:    s_setpc_b64 s[30:31]
1226  %voffset.add = add i32 %voffset, 60
1227  %data = call <2 x double> @llvm.amdgcn.raw.ptr.buffer.load.v2f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1228  ret <2 x double> %data
1229}
1230
1231define i64 @buffer_load_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1232; PREGFX10-LABEL: buffer_load_i64__voffset_add:
1233; PREGFX10:       ; %bb.0:
1234; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1235; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1236; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1237; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1238;
1239; GFX10-LABEL: buffer_load_i64__voffset_add:
1240; GFX10:       ; %bb.0:
1241; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1243; GFX10-NEXT:    s_waitcnt vmcnt(0)
1244; GFX10-NEXT:    s_setpc_b64 s[30:31]
1245;
1246; GFX11-LABEL: buffer_load_i64__voffset_add:
1247; GFX11:       ; %bb.0:
1248; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1250; GFX11-NEXT:    s_waitcnt vmcnt(0)
1251; GFX11-NEXT:    s_setpc_b64 s[30:31]
1252  %voffset.add = add i32 %voffset, 60
1253  %data = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1254  ret i64 %data
1255}
1256
1257define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1258; PREGFX10-LABEL: buffer_load_v2i64__voffset_add:
1259; PREGFX10:       ; %bb.0:
1260; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1261; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1262; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1263; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1264;
1265; GFX10-LABEL: buffer_load_v2i64__voffset_add:
1266; GFX10:       ; %bb.0:
1267; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1268; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1269; GFX10-NEXT:    s_waitcnt vmcnt(0)
1270; GFX10-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX11-LABEL: buffer_load_v2i64__voffset_add:
1273; GFX11:       ; %bb.0:
1274; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1276; GFX11-NEXT:    s_waitcnt vmcnt(0)
1277; GFX11-NEXT:    s_setpc_b64 s[30:31]
1278  %voffset.add = add i32 %voffset, 60
1279  %data = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1280  ret <2 x i64> %data
1281}
1282
1283define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1284; PREGFX10-LABEL: buffer_load_p0__voffset_add:
1285; PREGFX10:       ; %bb.0:
1286; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1288; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1289; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1290;
1291; GFX10-LABEL: buffer_load_p0__voffset_add:
1292; GFX10:       ; %bb.0:
1293; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1295; GFX10-NEXT:    s_waitcnt vmcnt(0)
1296; GFX10-NEXT:    s_setpc_b64 s[30:31]
1297;
1298; GFX11-LABEL: buffer_load_p0__voffset_add:
1299; GFX11:       ; %bb.0:
1300; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1302; GFX11-NEXT:    s_waitcnt vmcnt(0)
1303; GFX11-NEXT:    s_setpc_b64 s[30:31]
1304  %voffset.add = add i32 %voffset, 60
1305  %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1306  ret ptr %data
1307}
1308
1309define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1310; PREGFX10-LABEL: buffer_load_v2p0__voffset_add:
1311; PREGFX10:       ; %bb.0:
1312; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1314; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1315; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1316;
1317; GFX10-LABEL: buffer_load_v2p0__voffset_add:
1318; GFX10:       ; %bb.0:
1319; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1321; GFX10-NEXT:    s_waitcnt vmcnt(0)
1322; GFX10-NEXT:    s_setpc_b64 s[30:31]
1323;
1324; GFX11-LABEL: buffer_load_v2p0__voffset_add:
1325; GFX11:       ; %bb.0:
1326; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1327; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1328; GFX11-NEXT:    s_waitcnt vmcnt(0)
1329; GFX11-NEXT:    s_setpc_b64 s[30:31]
1330  %voffset.add = add i32 %voffset, 60
1331  %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1332  ret <2 x ptr> %data
1333}
1334
1335define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1336; PREGFX10-LABEL: buffer_load_p1__voffset_add:
1337; PREGFX10:       ; %bb.0:
1338; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1340; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1341; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1342;
1343; GFX10-LABEL: buffer_load_p1__voffset_add:
1344; GFX10:       ; %bb.0:
1345; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1347; GFX10-NEXT:    s_waitcnt vmcnt(0)
1348; GFX10-NEXT:    s_setpc_b64 s[30:31]
1349;
1350; GFX11-LABEL: buffer_load_p1__voffset_add:
1351; GFX11:       ; %bb.0:
1352; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1354; GFX11-NEXT:    s_waitcnt vmcnt(0)
1355; GFX11-NEXT:    s_setpc_b64 s[30:31]
1356  %voffset.add = add i32 %voffset, 60
1357  %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1358  ret ptr addrspace(1) %data
1359}
1360
1361define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1362; PREGFX10-LABEL: buffer_load_v2p1__voffset_add:
1363; PREGFX10:       ; %bb.0:
1364; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1366; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1367; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1368;
1369; GFX10-LABEL: buffer_load_v2p1__voffset_add:
1370; GFX10:       ; %bb.0:
1371; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1373; GFX10-NEXT:    s_waitcnt vmcnt(0)
1374; GFX10-NEXT:    s_setpc_b64 s[30:31]
1375;
1376; GFX11-LABEL: buffer_load_v2p1__voffset_add:
1377; GFX11:       ; %bb.0:
1378; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1379; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1380; GFX11-NEXT:    s_waitcnt vmcnt(0)
1381; GFX11-NEXT:    s_setpc_b64 s[30:31]
1382  %voffset.add = add i32 %voffset, 60
1383  %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1384  ret <2 x ptr addrspace(1)> %data
1385}
1386
1387define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1388; PREGFX10-LABEL: buffer_load_p4__voffset_add:
1389; PREGFX10:       ; %bb.0:
1390; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1392; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1393; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1394;
1395; GFX10-LABEL: buffer_load_p4__voffset_add:
1396; GFX10:       ; %bb.0:
1397; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1398; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1399; GFX10-NEXT:    s_waitcnt vmcnt(0)
1400; GFX10-NEXT:    s_setpc_b64 s[30:31]
1401;
1402; GFX11-LABEL: buffer_load_p4__voffset_add:
1403; GFX11:       ; %bb.0:
1404; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1405; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1406; GFX11-NEXT:    s_waitcnt vmcnt(0)
1407; GFX11-NEXT:    s_setpc_b64 s[30:31]
1408  %voffset.add = add i32 %voffset, 60
1409  %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1410  ret ptr addrspace(4) %data
1411}
1412
1413define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1414; PREGFX10-LABEL: buffer_load_v2p4__voffset_add:
1415; PREGFX10:       ; %bb.0:
1416; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1417; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1418; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1419; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1420;
1421; GFX10-LABEL: buffer_load_v2p4__voffset_add:
1422; GFX10:       ; %bb.0:
1423; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1425; GFX10-NEXT:    s_waitcnt vmcnt(0)
1426; GFX10-NEXT:    s_setpc_b64 s[30:31]
1427;
1428; GFX11-LABEL: buffer_load_v2p4__voffset_add:
1429; GFX11:       ; %bb.0:
1430; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1432; GFX11-NEXT:    s_waitcnt vmcnt(0)
1433; GFX11-NEXT:    s_setpc_b64 s[30:31]
1434  %voffset.add = add i32 %voffset, 60
1435  %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1436  ret <2 x ptr addrspace(4)> %data
1437}
1438
1439define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1440; PREGFX10-LABEL: buffer_load_p999__voffset_add:
1441; PREGFX10:       ; %bb.0:
1442; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1444; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1445; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1446;
1447; GFX10-LABEL: buffer_load_p999__voffset_add:
1448; GFX10:       ; %bb.0:
1449; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1451; GFX10-NEXT:    s_waitcnt vmcnt(0)
1452; GFX10-NEXT:    s_setpc_b64 s[30:31]
1453;
1454; GFX11-LABEL: buffer_load_p999__voffset_add:
1455; GFX11:       ; %bb.0:
1456; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1457; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1458; GFX11-NEXT:    s_waitcnt vmcnt(0)
1459; GFX11-NEXT:    s_setpc_b64 s[30:31]
1460  %voffset.add = add i32 %voffset, 60
1461  %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1462  ret ptr addrspace(999) %data
1463}
1464
1465define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1466; PREGFX10-LABEL: buffer_load_v2p999__voffset_add:
1467; PREGFX10:       ; %bb.0:
1468; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1469; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1470; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1471; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1472;
1473; GFX10-LABEL: buffer_load_v2p999__voffset_add:
1474; GFX10:       ; %bb.0:
1475; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1476; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1477; GFX10-NEXT:    s_waitcnt vmcnt(0)
1478; GFX10-NEXT:    s_setpc_b64 s[30:31]
1479;
1480; GFX11-LABEL: buffer_load_v2p999__voffset_add:
1481; GFX11:       ; %bb.0:
1482; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1484; GFX11-NEXT:    s_waitcnt vmcnt(0)
1485; GFX11-NEXT:    s_setpc_b64 s[30:31]
1486  %voffset.add = add i32 %voffset, 60
1487  %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1488  ret <2 x ptr addrspace(999)> %data
1489}
1490
1491define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1492; PREGFX10-LABEL: buffer_load_p2__voffset_add:
1493; PREGFX10:       ; %bb.0:
1494; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1495; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1496; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1497; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1498;
1499; GFX10-LABEL: buffer_load_p2__voffset_add:
1500; GFX10:       ; %bb.0:
1501; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502; GFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1503; GFX10-NEXT:    s_waitcnt vmcnt(0)
1504; GFX10-NEXT:    s_setpc_b64 s[30:31]
1505;
1506; GFX11-LABEL: buffer_load_p2__voffset_add:
1507; GFX11:       ; %bb.0:
1508; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1510; GFX11-NEXT:    s_waitcnt vmcnt(0)
1511; GFX11-NEXT:    s_setpc_b64 s[30:31]
1512  %voffset.add = add i32 %voffset, 60
1513  %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1514  ret ptr addrspace(2) %data
1515}
1516
1517define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1518; PREGFX10-LABEL: buffer_load_v2p2__voffset_add:
1519; PREGFX10:       ; %bb.0:
1520; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1522; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1523; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1524;
1525; GFX10-LABEL: buffer_load_v2p2__voffset_add:
1526; GFX10:       ; %bb.0:
1527; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1528; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1529; GFX10-NEXT:    s_waitcnt vmcnt(0)
1530; GFX10-NEXT:    s_setpc_b64 s[30:31]
1531;
1532; GFX11-LABEL: buffer_load_v2p2__voffset_add:
1533; GFX11:       ; %bb.0:
1534; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1535; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1536; GFX11-NEXT:    s_waitcnt vmcnt(0)
1537; GFX11-NEXT:    s_setpc_b64 s[30:31]
1538  %voffset.add = add i32 %voffset, 60
1539  %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1540  ret <2 x ptr addrspace(2)> %data
1541}
1542
1543define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1544; GFX10-LABEL: buffer_load_v3p2__voffset_add:
1545; GFX10:       ; %bb.0:
1546; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1548; GFX10-NEXT:    s_waitcnt vmcnt(0)
1549; GFX10-NEXT:    s_setpc_b64 s[30:31]
1550;
1551; GFX11-LABEL: buffer_load_v3p2__voffset_add:
1552; GFX11:       ; %bb.0:
1553; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1555; GFX11-NEXT:    s_waitcnt vmcnt(0)
1556; GFX11-NEXT:    s_setpc_b64 s[30:31]
1557  %voffset.add = add i32 %voffset, 60
1558  %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1559  ret <3 x ptr addrspace(2)> %data
1560}
1561
1562define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1563; PREGFX10-LABEL: buffer_load_v4p2__voffset_add:
1564; PREGFX10:       ; %bb.0:
1565; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1567; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1568; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1569;
1570; GFX10-LABEL: buffer_load_v4p2__voffset_add:
1571; GFX10:       ; %bb.0:
1572; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1573; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1574; GFX10-NEXT:    s_waitcnt vmcnt(0)
1575; GFX10-NEXT:    s_setpc_b64 s[30:31]
1576;
1577; GFX11-LABEL: buffer_load_v4p2__voffset_add:
1578; GFX11:       ; %bb.0:
1579; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1580; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1581; GFX11-NEXT:    s_waitcnt vmcnt(0)
1582; GFX11-NEXT:    s_setpc_b64 s[30:31]
1583  %voffset.add = add i32 %voffset, 60
1584  %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1585  ret <4 x ptr addrspace(2)> %data
1586}
1587
1588define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1589; PREGFX10-LABEL: buffer_load_p3__voffset_add:
1590; PREGFX10:       ; %bb.0:
1591; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1593; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1594; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1595;
1596; GFX10-LABEL: buffer_load_p3__voffset_add:
1597; GFX10:       ; %bb.0:
1598; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599; GFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1600; GFX10-NEXT:    s_waitcnt vmcnt(0)
1601; GFX10-NEXT:    s_setpc_b64 s[30:31]
1602;
1603; GFX11-LABEL: buffer_load_p3__voffset_add:
1604; GFX11:       ; %bb.0:
1605; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1607; GFX11-NEXT:    s_waitcnt vmcnt(0)
1608; GFX11-NEXT:    s_setpc_b64 s[30:31]
1609  %voffset.add = add i32 %voffset, 60
1610  %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1611  ret ptr addrspace(3) %data
1612}
1613
1614define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1615; PREGFX10-LABEL: buffer_load_v2p3__voffset_add:
1616; PREGFX10:       ; %bb.0:
1617; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1619; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1620; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1621;
1622; GFX10-LABEL: buffer_load_v2p3__voffset_add:
1623; GFX10:       ; %bb.0:
1624; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1626; GFX10-NEXT:    s_waitcnt vmcnt(0)
1627; GFX10-NEXT:    s_setpc_b64 s[30:31]
1628;
1629; GFX11-LABEL: buffer_load_v2p3__voffset_add:
1630; GFX11:       ; %bb.0:
1631; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1633; GFX11-NEXT:    s_waitcnt vmcnt(0)
1634; GFX11-NEXT:    s_setpc_b64 s[30:31]
1635  %voffset.add = add i32 %voffset, 60
1636  %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1637  ret <2 x ptr addrspace(3)> %data
1638}
1639
1640define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1641; GFX10-LABEL: buffer_load_v3p3__voffset_add:
1642; GFX10:       ; %bb.0:
1643; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1645; GFX10-NEXT:    s_waitcnt vmcnt(0)
1646; GFX10-NEXT:    s_setpc_b64 s[30:31]
1647;
1648; GFX11-LABEL: buffer_load_v3p3__voffset_add:
1649; GFX11:       ; %bb.0:
1650; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1652; GFX11-NEXT:    s_waitcnt vmcnt(0)
1653; GFX11-NEXT:    s_setpc_b64 s[30:31]
1654  %voffset.add = add i32 %voffset, 60
1655  %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1656  ret <3 x ptr addrspace(3)> %data
1657}
1658
1659define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1660; PREGFX10-LABEL: buffer_load_v4p3__voffset_add:
1661; PREGFX10:       ; %bb.0:
1662; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1664; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1665; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1666;
1667; GFX10-LABEL: buffer_load_v4p3__voffset_add:
1668; GFX10:       ; %bb.0:
1669; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1671; GFX10-NEXT:    s_waitcnt vmcnt(0)
1672; GFX10-NEXT:    s_setpc_b64 s[30:31]
1673;
1674; GFX11-LABEL: buffer_load_v4p3__voffset_add:
1675; GFX11:       ; %bb.0:
1676; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1678; GFX11-NEXT:    s_waitcnt vmcnt(0)
1679; GFX11-NEXT:    s_setpc_b64 s[30:31]
1680  %voffset.add = add i32 %voffset, 60
1681  %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1682  ret <4 x ptr addrspace(3)> %data
1683}
1684
1685define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1686; PREGFX10-LABEL: buffer_load_p5__voffset_add:
1687; PREGFX10:       ; %bb.0:
1688; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1690; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1691; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1692;
1693; GFX10-LABEL: buffer_load_p5__voffset_add:
1694; GFX10:       ; %bb.0:
1695; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696; GFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1697; GFX10-NEXT:    s_waitcnt vmcnt(0)
1698; GFX10-NEXT:    s_setpc_b64 s[30:31]
1699;
1700; GFX11-LABEL: buffer_load_p5__voffset_add:
1701; GFX11:       ; %bb.0:
1702; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1703; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1704; GFX11-NEXT:    s_waitcnt vmcnt(0)
1705; GFX11-NEXT:    s_setpc_b64 s[30:31]
1706  %voffset.add = add i32 %voffset, 60
1707  %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1708  ret ptr addrspace(5) %data
1709}
1710
1711define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1712; PREGFX10-LABEL: buffer_load_v2p5__voffset_add:
1713; PREGFX10:       ; %bb.0:
1714; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1715; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1716; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1717; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1718;
1719; GFX10-LABEL: buffer_load_v2p5__voffset_add:
1720; GFX10:       ; %bb.0:
1721; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1723; GFX10-NEXT:    s_waitcnt vmcnt(0)
1724; GFX10-NEXT:    s_setpc_b64 s[30:31]
1725;
1726; GFX11-LABEL: buffer_load_v2p5__voffset_add:
1727; GFX11:       ; %bb.0:
1728; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1730; GFX11-NEXT:    s_waitcnt vmcnt(0)
1731; GFX11-NEXT:    s_setpc_b64 s[30:31]
1732  %voffset.add = add i32 %voffset, 60
1733  %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1734  ret <2 x ptr addrspace(5)> %data
1735}
1736
1737define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1738; GFX10-LABEL: buffer_load_v3p5__voffset_add:
1739; GFX10:       ; %bb.0:
1740; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1742; GFX10-NEXT:    s_waitcnt vmcnt(0)
1743; GFX10-NEXT:    s_setpc_b64 s[30:31]
1744;
1745; GFX11-LABEL: buffer_load_v3p5__voffset_add:
1746; GFX11:       ; %bb.0:
1747; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1749; GFX11-NEXT:    s_waitcnt vmcnt(0)
1750; GFX11-NEXT:    s_setpc_b64 s[30:31]
1751  %voffset.add = add i32 %voffset, 60
1752  %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1753  ret <3 x ptr addrspace(5)> %data
1754}
1755
1756define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1757; PREGFX10-LABEL: buffer_load_v4p5__voffset_add:
1758; PREGFX10:       ; %bb.0:
1759; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1761; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1762; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1763;
1764; GFX10-LABEL: buffer_load_v4p5__voffset_add:
1765; GFX10:       ; %bb.0:
1766; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1768; GFX10-NEXT:    s_waitcnt vmcnt(0)
1769; GFX10-NEXT:    s_setpc_b64 s[30:31]
1770;
1771; GFX11-LABEL: buffer_load_v4p5__voffset_add:
1772; GFX11:       ; %bb.0:
1773; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1775; GFX11-NEXT:    s_waitcnt vmcnt(0)
1776; GFX11-NEXT:    s_setpc_b64 s[30:31]
1777  %voffset.add = add i32 %voffset, 60
1778  %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1779  ret <4 x ptr addrspace(5)> %data
1780}
1781
1782define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1783; PREGFX10-LABEL: buffer_load_p6__voffset_add:
1784; PREGFX10:       ; %bb.0:
1785; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1787; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1788; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1789;
1790; GFX10-LABEL: buffer_load_p6__voffset_add:
1791; GFX10:       ; %bb.0:
1792; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793; GFX10-NEXT:    buffer_load_dword v0, v0, s[16:19], 0 offen offset:60
1794; GFX10-NEXT:    s_waitcnt vmcnt(0)
1795; GFX10-NEXT:    s_setpc_b64 s[30:31]
1796;
1797; GFX11-LABEL: buffer_load_p6__voffset_add:
1798; GFX11:       ; %bb.0:
1799; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
1801; GFX11-NEXT:    s_waitcnt vmcnt(0)
1802; GFX11-NEXT:    s_setpc_b64 s[30:31]
1803  %voffset.add = add i32 %voffset, 60
1804  %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1805  ret ptr addrspace(6) %data
1806}
1807
1808define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1809; PREGFX10-LABEL: buffer_load_v2p6__voffset_add:
1810; PREGFX10:       ; %bb.0:
1811; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1813; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1814; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1815;
1816; GFX10-LABEL: buffer_load_v2p6__voffset_add:
1817; GFX10:       ; %bb.0:
1818; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1819; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60
1820; GFX10-NEXT:    s_waitcnt vmcnt(0)
1821; GFX10-NEXT:    s_setpc_b64 s[30:31]
1822;
1823; GFX11-LABEL: buffer_load_v2p6__voffset_add:
1824; GFX11:       ; %bb.0:
1825; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
1827; GFX11-NEXT:    s_waitcnt vmcnt(0)
1828; GFX11-NEXT:    s_setpc_b64 s[30:31]
1829  %voffset.add = add i32 %voffset, 60
1830  %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1831  ret <2 x ptr addrspace(6)> %data
1832}
1833
1834define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1835; GFX10-LABEL: buffer_load_v3p6__voffset_add:
1836; GFX10:       ; %bb.0:
1837; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60
1839; GFX10-NEXT:    s_waitcnt vmcnt(0)
1840; GFX10-NEXT:    s_setpc_b64 s[30:31]
1841;
1842; GFX11-LABEL: buffer_load_v3p6__voffset_add:
1843; GFX11:       ; %bb.0:
1844; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1845; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
1846; GFX11-NEXT:    s_waitcnt vmcnt(0)
1847; GFX11-NEXT:    s_setpc_b64 s[30:31]
1848  %voffset.add = add i32 %voffset, 60
1849  %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1850  ret <3 x ptr addrspace(6)> %data
1851}
1852
1853define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
1854; PREGFX10-LABEL: buffer_load_v4p6__voffset_add:
1855; PREGFX10:       ; %bb.0:
1856; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1857; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1858; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
1859; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
1860;
1861; GFX10-LABEL: buffer_load_v4p6__voffset_add:
1862; GFX10:       ; %bb.0:
1863; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60
1865; GFX10-NEXT:    s_waitcnt vmcnt(0)
1866; GFX10-NEXT:    s_setpc_b64 s[30:31]
1867;
1868; GFX11-LABEL: buffer_load_v4p6__voffset_add:
1869; GFX11:       ; %bb.0:
1870; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1871; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
1872; GFX11-NEXT:    s_waitcnt vmcnt(0)
1873; GFX11-NEXT:    s_setpc_b64 s[30:31]
1874  %voffset.add = add i32 %voffset, 60
1875  %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
1876  ret <4 x ptr addrspace(6)> %data
1877}
1878
1879declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
1880declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
1881declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
1882declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0
1883declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0
1884declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0
1885declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
1886declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0
1887declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0
1888declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0
1889declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0
1890declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0
1891declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0
1892declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0
1893attributes #0 = { nounwind readonly }
1894