xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll (revision 75e528fdd9594ecb6fdb5d9e7bee1506f7e43be0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s
4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s
5;RUN: llc < %s -mtriple=amdgcn -mattr=-enable-prt-strict-null -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=NOPRT %s
6
7define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
8; GFX6-LABEL: buffer_load:
9; GFX6:       ; %bb.0: ; %main_body
10; GFX6-NEXT:    v_mov_b32_e32 v8, 0
11; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
12; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
13; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
14; GFX6-NEXT:    s_waitcnt vmcnt(0)
15; GFX6-NEXT:    ; return to shader part epilog
16;
17; GFX8PLUS-LABEL: buffer_load:
18; GFX8PLUS:       ; %bb.0: ; %main_body
19; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
20; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
21; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
22; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
23; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
24; GFX8PLUS-NEXT:    ; return to shader part epilog
25;
26; GFX11-LABEL: buffer_load:
27; GFX11:       ; %bb.0: ; %main_body
28; GFX11-NEXT:    v_mov_b32_e32 v8, 0
29; GFX11-NEXT:    s_clause 0x2
30; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
31; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
32; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
33; GFX11-NEXT:    s_waitcnt vmcnt(0)
34; GFX11-NEXT:    ; return to shader part epilog
35;
36; NOPRT-LABEL: buffer_load:
37; NOPRT:       ; %bb.0: ; %main_body
38; NOPRT-NEXT:    v_mov_b32_e32 v8, 0
39; NOPRT-NEXT:    s_clause 0x2
40; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
41; NOPRT-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
42; NOPRT-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
43; NOPRT-NEXT:    s_waitcnt vmcnt(0)
44; NOPRT-NEXT:    ; return to shader part epilog
45main_body:
46  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
47  %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
48  %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
49  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
50  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
51  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
52  ret {<4 x float>, <4 x float>, <4 x float>} %r2
53}
54
55define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
56; GFX6-LABEL: buffer_load_immoffs:
57; GFX6:       ; %bb.0: ; %main_body
58; GFX6-NEXT:    v_mov_b32_e32 v0, 0
59; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
60; GFX6-NEXT:    s_waitcnt vmcnt(0)
61; GFX6-NEXT:    ; return to shader part epilog
62;
63; GFX8PLUS-LABEL: buffer_load_immoffs:
64; GFX8PLUS:       ; %bb.0: ; %main_body
65; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
66; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
67; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
68; GFX8PLUS-NEXT:    ; return to shader part epilog
69;
70; GFX11-LABEL: buffer_load_immoffs:
71; GFX11:       ; %bb.0: ; %main_body
72; GFX11-NEXT:    v_mov_b32_e32 v0, 0
73; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
74; GFX11-NEXT:    s_waitcnt vmcnt(0)
75; GFX11-NEXT:    ; return to shader part epilog
76;
77; NOPRT-LABEL: buffer_load_immoffs:
78; NOPRT:       ; %bb.0: ; %main_body
79; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
80; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
81; NOPRT-NEXT:    s_waitcnt vmcnt(0)
82; NOPRT-NEXT:    ; return to shader part epilog
83main_body:
84  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0)
85  ret <4 x float> %data
86}
87
88define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
89; GFX6-LABEL: buffer_load_immoffs_large:
90; GFX6:       ; %bb.0: ; %main_body
91; GFX6-NEXT:    v_mov_b32_e32 v8, 0
92; GFX6-NEXT:    s_movk_i32 s4, 0x7ffc
93; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
94; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
95; GFX6-NEXT:    s_mov_b32 s4, 0x8ffc
96; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
97; GFX6-NEXT:    s_waitcnt vmcnt(1)
98; GFX6-NEXT:    v_add_f32_e32 v3, v3, v7
99; GFX6-NEXT:    v_add_f32_e32 v2, v2, v6
100; GFX6-NEXT:    v_add_f32_e32 v1, v1, v5
101; GFX6-NEXT:    v_add_f32_e32 v0, v0, v4
102; GFX6-NEXT:    s_waitcnt vmcnt(0)
103; GFX6-NEXT:    v_add_f32_e32 v0, v8, v0
104; GFX6-NEXT:    v_add_f32_e32 v1, v9, v1
105; GFX6-NEXT:    v_add_f32_e32 v2, v10, v2
106; GFX6-NEXT:    v_add_f32_e32 v3, v11, v3
107; GFX6-NEXT:    ; return to shader part epilog
108;
109; GFX8PLUS-LABEL: buffer_load_immoffs_large:
110; GFX8PLUS:       ; %bb.0: ; %main_body
111; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
112; GFX8PLUS-NEXT:    s_movk_i32 s4, 0x7ffc
113; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
114; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
115; GFX8PLUS-NEXT:    s_mov_b32 s4, 0x8ffc
116; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
117; GFX8PLUS-NEXT:    s_waitcnt vmcnt(1)
118; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v3, v7
119; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v2, v6
120; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v1, v5
121; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v0, v4
122; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
123; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v8, v0
124; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v9, v1
125; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v10, v2
126; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v11, v3
127; GFX8PLUS-NEXT:    ; return to shader part epilog
128;
129; GFX11-LABEL: buffer_load_immoffs_large:
130; GFX11:       ; %bb.0: ; %main_body
131; GFX11-NEXT:    v_mov_b32_e32 v8, 0
132; GFX11-NEXT:    s_movk_i32 s4, 0x7ffc
133; GFX11-NEXT:    s_clause 0x1
134; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
135; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
136; GFX11-NEXT:    s_mov_b32 s4, 0x8ffc
137; GFX11-NEXT:    s_waitcnt vmcnt(0)
138; GFX11-NEXT:    v_add_f32_e32 v1, v1, v5
139; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
140; GFX11-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
141; GFX11-NEXT:    s_waitcnt vmcnt(0)
142; GFX11-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
143; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
144; GFX11-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
145; GFX11-NEXT:    v_add_f32_e32 v2, v10, v2
146; GFX11-NEXT:    ; return to shader part epilog
147;
148; NOPRT-LABEL: buffer_load_immoffs_large:
149; NOPRT:       ; %bb.0: ; %main_body
150; NOPRT-NEXT:    v_mov_b32_e32 v8, 0
151; NOPRT-NEXT:    s_movk_i32 s4, 0x7ffc
152; NOPRT-NEXT:    s_clause 0x1
153; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
154; NOPRT-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
155; NOPRT-NEXT:    s_mov_b32 s4, 0x8ffc
156; NOPRT-NEXT:    s_waitcnt vmcnt(0)
157; NOPRT-NEXT:    v_add_f32_e32 v1, v1, v5
158; NOPRT-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
159; NOPRT-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
160; NOPRT-NEXT:    s_waitcnt vmcnt(0)
161; NOPRT-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
162; NOPRT-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
163; NOPRT-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
164; NOPRT-NEXT:    v_add_f32_e32 v2, v10, v2
165; NOPRT-NEXT:    ; return to shader part epilog
166main_body:
167  %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0)
168  %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0)
169  %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0)
170  %d.3 = fadd <4 x float> %d.0, %d.1
171  %data = fadd <4 x float> %d.2, %d.3
172  ret <4 x float> %data
173}
174
175define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
176; GFX6-LABEL: buffer_load_voffset_large_12bit:
177; GFX6:       ; %bb.0: ; %main_body
178; GFX6-NEXT:    v_mov_b32_e32 v0, 0
179; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
180; GFX6-NEXT:    s_waitcnt vmcnt(0)
181; GFX6-NEXT:    ; return to shader part epilog
182;
183; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
184; GFX8PLUS:       ; %bb.0: ; %main_body
185; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
186; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
187; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
188; GFX8PLUS-NEXT:    ; return to shader part epilog
189;
190; GFX11-LABEL: buffer_load_voffset_large_12bit:
191; GFX11:       ; %bb.0: ; %main_body
192; GFX11-NEXT:    v_mov_b32_e32 v0, 0
193; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
194; GFX11-NEXT:    s_waitcnt vmcnt(0)
195; GFX11-NEXT:    ; return to shader part epilog
196;
197; NOPRT-LABEL: buffer_load_voffset_large_12bit:
198; NOPRT:       ; %bb.0: ; %main_body
199; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
200; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
201; NOPRT-NEXT:    s_waitcnt vmcnt(0)
202; NOPRT-NEXT:    ; return to shader part epilog
203main_body:
204  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0)
205  ret <4 x float> %data
206}
207
208define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
209; GFX6-LABEL: buffer_load_voffset_large_13bit:
210; GFX6:       ; %bb.0: ; %main_body
211; GFX6-NEXT:    s_mov_b32 s4, 0
212; GFX6-NEXT:    v_mov_b32_e32 v1, 0x1000
213; GFX6-NEXT:    v_mov_b32_e32 v0, s4
214; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
215; GFX6-NEXT:    s_waitcnt vmcnt(0)
216; GFX6-NEXT:    ; return to shader part epilog
217;
218; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
219; GFX8PLUS:       ; %bb.0: ; %main_body
220; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
221; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x1000
222; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
223; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
224; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
225; GFX8PLUS-NEXT:    ; return to shader part epilog
226;
227; GFX11-LABEL: buffer_load_voffset_large_13bit:
228; GFX11:       ; %bb.0: ; %main_body
229; GFX11-NEXT:    s_mov_b32 s4, 0
230; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
231; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
232; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
233; GFX11-NEXT:    s_waitcnt vmcnt(0)
234; GFX11-NEXT:    ; return to shader part epilog
235;
236; NOPRT-LABEL: buffer_load_voffset_large_13bit:
237; NOPRT:       ; %bb.0: ; %main_body
238; NOPRT-NEXT:    s_mov_b32 s4, 0
239; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
240; NOPRT-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
241; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
242; NOPRT-NEXT:    s_waitcnt vmcnt(0)
243; NOPRT-NEXT:    ; return to shader part epilog
244main_body:
245  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0)
246  ret <4 x float> %data
247}
248
249define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
250; GFX6-LABEL: buffer_load_voffset_large_16bit:
251; GFX6:       ; %bb.0: ; %main_body
252; GFX6-NEXT:    s_mov_b32 s4, 0
253; GFX6-NEXT:    v_mov_b32_e32 v1, 0xf000
254; GFX6-NEXT:    v_mov_b32_e32 v0, s4
255; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
256; GFX6-NEXT:    s_waitcnt vmcnt(0)
257; GFX6-NEXT:    ; return to shader part epilog
258;
259; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
260; GFX8PLUS:       ; %bb.0: ; %main_body
261; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
262; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xf000
263; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
264; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
265; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
266; GFX8PLUS-NEXT:    ; return to shader part epilog
267;
268; GFX11-LABEL: buffer_load_voffset_large_16bit:
269; GFX11:       ; %bb.0: ; %main_body
270; GFX11-NEXT:    s_mov_b32 s4, 0
271; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
272; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
273; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
274; GFX11-NEXT:    s_waitcnt vmcnt(0)
275; GFX11-NEXT:    ; return to shader part epilog
276;
277; NOPRT-LABEL: buffer_load_voffset_large_16bit:
278; NOPRT:       ; %bb.0: ; %main_body
279; NOPRT-NEXT:    s_mov_b32 s4, 0
280; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
281; NOPRT-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
282; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
283; NOPRT-NEXT:    s_waitcnt vmcnt(0)
284; NOPRT-NEXT:    ; return to shader part epilog
285main_body:
286  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0)
287  ret <4 x float> %data
288}
289
290define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
291; GFX6-LABEL: buffer_load_voffset_large_23bit:
292; GFX6:       ; %bb.0: ; %main_body
293; GFX6-NEXT:    s_mov_b32 s4, 0
294; GFX6-NEXT:    v_mov_b32_e32 v1, 0x7ff000
295; GFX6-NEXT:    v_mov_b32_e32 v0, s4
296; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
297; GFX6-NEXT:    s_waitcnt vmcnt(0)
298; GFX6-NEXT:    ; return to shader part epilog
299;
300; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
301; GFX8PLUS:       ; %bb.0: ; %main_body
302; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
303; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x7ff000
304; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
305; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
306; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
307; GFX8PLUS-NEXT:    ; return to shader part epilog
308;
309; GFX11-LABEL: buffer_load_voffset_large_23bit:
310; GFX11:       ; %bb.0: ; %main_body
311; GFX11-NEXT:    s_mov_b32 s4, 0
312; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
313; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
314; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
315; GFX11-NEXT:    s_waitcnt vmcnt(0)
316; GFX11-NEXT:    ; return to shader part epilog
317;
318; NOPRT-LABEL: buffer_load_voffset_large_23bit:
319; NOPRT:       ; %bb.0: ; %main_body
320; NOPRT-NEXT:    s_mov_b32 s4, 0
321; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
322; NOPRT-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
323; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
324; NOPRT-NEXT:    s_waitcnt vmcnt(0)
325; NOPRT-NEXT:    ; return to shader part epilog
326main_body:
327  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0)
328  ret <4 x float> %data
329}
330
331define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
332; GFX6-LABEL: buffer_load_voffset_large_24bit:
333; GFX6:       ; %bb.0: ; %main_body
334; GFX6-NEXT:    s_mov_b32 s4, 0
335; GFX6-NEXT:    v_mov_b32_e32 v1, 0xfff000
336; GFX6-NEXT:    v_mov_b32_e32 v0, s4
337; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
338; GFX6-NEXT:    s_waitcnt vmcnt(0)
339; GFX6-NEXT:    ; return to shader part epilog
340;
341; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
342; GFX8PLUS:       ; %bb.0: ; %main_body
343; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
344; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xfff000
345; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
346; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
347; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
348; GFX8PLUS-NEXT:    ; return to shader part epilog
349;
350; GFX11-LABEL: buffer_load_voffset_large_24bit:
351; GFX11:       ; %bb.0: ; %main_body
352; GFX11-NEXT:    s_mov_b32 s4, 0
353; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
354; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
355; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
356; GFX11-NEXT:    s_waitcnt vmcnt(0)
357; GFX11-NEXT:    ; return to shader part epilog
358;
359; NOPRT-LABEL: buffer_load_voffset_large_24bit:
360; NOPRT:       ; %bb.0: ; %main_body
361; NOPRT-NEXT:    s_mov_b32 s4, 0
362; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
363; NOPRT-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
364; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
365; NOPRT-NEXT:    s_waitcnt vmcnt(0)
366; NOPRT-NEXT:    ; return to shader part epilog
367main_body:
368  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0)
369  ret <4 x float> %data
370}
371
372define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
373; GFX6-LABEL: buffer_load_idx:
374; GFX6:       ; %bb.0: ; %main_body
375; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
376; GFX6-NEXT:    s_waitcnt vmcnt(0)
377; GFX6-NEXT:    ; return to shader part epilog
378;
379; GFX8PLUS-LABEL: buffer_load_idx:
380; GFX8PLUS:       ; %bb.0: ; %main_body
381; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
382; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
383; GFX8PLUS-NEXT:    ; return to shader part epilog
384;
385; GFX11-LABEL: buffer_load_idx:
386; GFX11:       ; %bb.0: ; %main_body
387; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
388; GFX11-NEXT:    s_waitcnt vmcnt(0)
389; GFX11-NEXT:    ; return to shader part epilog
390;
391; NOPRT-LABEL: buffer_load_idx:
392; NOPRT:       ; %bb.0: ; %main_body
393; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
394; NOPRT-NEXT:    s_waitcnt vmcnt(0)
395; NOPRT-NEXT:    ; return to shader part epilog
396main_body:
397  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
398  ret <4 x float> %data
399}
400
401define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
402; GFX6-LABEL: buffer_load_ofs:
403; GFX6:       ; %bb.0: ; %main_body
404; GFX6-NEXT:    s_mov_b32 s4, 0
405; GFX6-NEXT:    v_mov_b32_e32 v1, v0
406; GFX6-NEXT:    v_mov_b32_e32 v0, s4
407; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
408; GFX6-NEXT:    s_waitcnt vmcnt(0)
409; GFX6-NEXT:    ; return to shader part epilog
410;
411; GFX8PLUS-LABEL: buffer_load_ofs:
412; GFX8PLUS:       ; %bb.0: ; %main_body
413; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
414; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
415; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
416; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
417; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
418; GFX8PLUS-NEXT:    ; return to shader part epilog
419;
420; GFX11-LABEL: buffer_load_ofs:
421; GFX11:       ; %bb.0: ; %main_body
422; GFX11-NEXT:    s_mov_b32 s4, 0
423; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
424; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
425; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
426; GFX11-NEXT:    s_waitcnt vmcnt(0)
427; GFX11-NEXT:    ; return to shader part epilog
428;
429; NOPRT-LABEL: buffer_load_ofs:
430; NOPRT:       ; %bb.0: ; %main_body
431; NOPRT-NEXT:    s_mov_b32 s4, 0
432; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
433; NOPRT-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
434; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
435; NOPRT-NEXT:    s_waitcnt vmcnt(0)
436; NOPRT-NEXT:    ; return to shader part epilog
437main_body:
438  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
439  ret <4 x float> %data
440}
441
442define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
443; GFX6-LABEL: buffer_load_ofs_imm:
444; GFX6:       ; %bb.0: ; %main_body
445; GFX6-NEXT:    s_mov_b32 s4, 0
446; GFX6-NEXT:    v_mov_b32_e32 v1, v0
447; GFX6-NEXT:    v_mov_b32_e32 v0, s4
448; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
449; GFX6-NEXT:    s_waitcnt vmcnt(0)
450; GFX6-NEXT:    ; return to shader part epilog
451;
452; GFX8PLUS-LABEL: buffer_load_ofs_imm:
453; GFX8PLUS:       ; %bb.0: ; %main_body
454; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
455; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
456; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
457; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
458; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
459; GFX8PLUS-NEXT:    ; return to shader part epilog
460;
461; GFX11-LABEL: buffer_load_ofs_imm:
462; GFX11:       ; %bb.0: ; %main_body
463; GFX11-NEXT:    s_mov_b32 s4, 0
464; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
465; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
466; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
467; GFX11-NEXT:    s_waitcnt vmcnt(0)
468; GFX11-NEXT:    ; return to shader part epilog
469;
470; NOPRT-LABEL: buffer_load_ofs_imm:
471; NOPRT:       ; %bb.0: ; %main_body
472; NOPRT-NEXT:    s_mov_b32 s4, 0
473; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
474; NOPRT-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
475; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
476; NOPRT-NEXT:    s_waitcnt vmcnt(0)
477; NOPRT-NEXT:    ; return to shader part epilog
478main_body:
479  %ofs = add i32 %1, 60
480  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
481  ret <4 x float> %data
482}
483
484define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
485; GFX6-LABEL: buffer_load_both:
486; GFX6:       ; %bb.0: ; %main_body
487; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
488; GFX6-NEXT:    s_waitcnt vmcnt(0)
489; GFX6-NEXT:    ; return to shader part epilog
490;
491; GFX8PLUS-LABEL: buffer_load_both:
492; GFX8PLUS:       ; %bb.0: ; %main_body
493; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
494; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
495; GFX8PLUS-NEXT:    ; return to shader part epilog
496;
497; GFX11-LABEL: buffer_load_both:
498; GFX11:       ; %bb.0: ; %main_body
499; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
500; GFX11-NEXT:    s_waitcnt vmcnt(0)
501; GFX11-NEXT:    ; return to shader part epilog
502;
503; NOPRT-LABEL: buffer_load_both:
504; NOPRT:       ; %bb.0: ; %main_body
505; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
506; NOPRT-NEXT:    s_waitcnt vmcnt(0)
507; NOPRT-NEXT:    ; return to shader part epilog
508main_body:
509  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
510  ret <4 x float> %data
511}
512
513define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
514; GFX6-LABEL: buffer_load_both_reversed:
515; GFX6:       ; %bb.0: ; %main_body
516; GFX6-NEXT:    v_mov_b32_e32 v2, v0
517; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
518; GFX6-NEXT:    s_waitcnt vmcnt(0)
519; GFX6-NEXT:    ; return to shader part epilog
520;
521; GFX8PLUS-LABEL: buffer_load_both_reversed:
522; GFX8PLUS:       ; %bb.0: ; %main_body
523; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, v0
524; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
525; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
526; GFX8PLUS-NEXT:    ; return to shader part epilog
527;
528; GFX11-LABEL: buffer_load_both_reversed:
529; GFX11:       ; %bb.0: ; %main_body
530; GFX11-NEXT:    v_mov_b32_e32 v2, v0
531; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
532; GFX11-NEXT:    s_waitcnt vmcnt(0)
533; GFX11-NEXT:    ; return to shader part epilog
534;
535; NOPRT-LABEL: buffer_load_both_reversed:
536; NOPRT:       ; %bb.0: ; %main_body
537; NOPRT-NEXT:    v_mov_b32_e32 v2, v0
538; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
539; NOPRT-NEXT:    s_waitcnt vmcnt(0)
540; NOPRT-NEXT:    ; return to shader part epilog
541main_body:
542  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
543  ret <4 x float> %data
544}
545
546define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) {
547; GFX6-LABEL: buffer_load_x:
548; GFX6:       ; %bb.0: ; %main_body
549; GFX6-NEXT:    v_mov_b32_e32 v0, 0
550; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
551; GFX6-NEXT:    s_waitcnt vmcnt(0)
552; GFX6-NEXT:    ; return to shader part epilog
553;
554; GFX8PLUS-LABEL: buffer_load_x:
555; GFX8PLUS:       ; %bb.0: ; %main_body
556; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
557; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
558; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
559; GFX8PLUS-NEXT:    ; return to shader part epilog
560;
561; GFX11-LABEL: buffer_load_x:
562; GFX11:       ; %bb.0: ; %main_body
563; GFX11-NEXT:    v_mov_b32_e32 v0, 0
564; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
565; GFX11-NEXT:    s_waitcnt vmcnt(0)
566; GFX11-NEXT:    ; return to shader part epilog
567;
568; NOPRT-LABEL: buffer_load_x:
569; NOPRT:       ; %bb.0: ; %main_body
570; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
571; NOPRT-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
572; NOPRT-NEXT:    s_waitcnt vmcnt(0)
573; NOPRT-NEXT:    ; return to shader part epilog
574main_body:
575  %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
576  ret float %data
577}
578
579define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) {
580; GFX6-LABEL: buffer_load_x_i32:
581; GFX6:       ; %bb.0: ; %main_body
582; GFX6-NEXT:    v_mov_b32_e32 v0, 0
583; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
584; GFX6-NEXT:    s_waitcnt vmcnt(0)
585; GFX6-NEXT:    ; return to shader part epilog
586;
587; GFX8PLUS-LABEL: buffer_load_x_i32:
588; GFX8PLUS:       ; %bb.0: ; %main_body
589; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
590; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
591; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
592; GFX8PLUS-NEXT:    ; return to shader part epilog
593;
594; GFX11-LABEL: buffer_load_x_i32:
595; GFX11:       ; %bb.0: ; %main_body
596; GFX11-NEXT:    v_mov_b32_e32 v0, 0
597; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
598; GFX11-NEXT:    s_waitcnt vmcnt(0)
599; GFX11-NEXT:    ; return to shader part epilog
600;
601; NOPRT-LABEL: buffer_load_x_i32:
602; NOPRT:       ; %bb.0: ; %main_body
603; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
604; NOPRT-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
605; NOPRT-NEXT:    s_waitcnt vmcnt(0)
606; NOPRT-NEXT:    ; return to shader part epilog
607main_body:
608  %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
609  %fdata = bitcast i32 %data to float
610  ret float %fdata
611}
612
613define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) {
614; GFX6-LABEL: buffer_load_xy:
615; GFX6:       ; %bb.0: ; %main_body
616; GFX6-NEXT:    v_mov_b32_e32 v0, 0
617; GFX6-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
618; GFX6-NEXT:    s_waitcnt vmcnt(0)
619; GFX6-NEXT:    ; return to shader part epilog
620;
621; GFX8PLUS-LABEL: buffer_load_xy:
622; GFX8PLUS:       ; %bb.0: ; %main_body
623; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
624; GFX8PLUS-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
625; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
626; GFX8PLUS-NEXT:    ; return to shader part epilog
627;
628; GFX11-LABEL: buffer_load_xy:
629; GFX11:       ; %bb.0: ; %main_body
630; GFX11-NEXT:    v_mov_b32_e32 v0, 0
631; GFX11-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
632; GFX11-NEXT:    s_waitcnt vmcnt(0)
633; GFX11-NEXT:    ; return to shader part epilog
634;
635; NOPRT-LABEL: buffer_load_xy:
636; NOPRT:       ; %bb.0: ; %main_body
637; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
638; NOPRT-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
639; NOPRT-NEXT:    s_waitcnt vmcnt(0)
640; NOPRT-NEXT:    ; return to shader part epilog
641main_body:
642  %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
643  ret <2 x float> %data
644}
645
646define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
647; GFX6-LABEL: buffer_load_v4i32_tfe:
648; GFX6:       ; %bb.0:
649; GFX6-NEXT:    v_mov_b32_e32 v2, 0
650; GFX6-NEXT:    v_mov_b32_e32 v3, v2
651; GFX6-NEXT:    v_mov_b32_e32 v4, v2
652; GFX6-NEXT:    v_mov_b32_e32 v5, v2
653; GFX6-NEXT:    v_mov_b32_e32 v6, v2
654; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
655; GFX6-NEXT:    s_mov_b32 s2, 0
656; GFX6-NEXT:    s_mov_b32 s3, 0xf000
657; GFX6-NEXT:    s_mov_b32 s0, s2
658; GFX6-NEXT:    s_mov_b32 s1, s2
659; GFX6-NEXT:    s_waitcnt vmcnt(0)
660; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
661; GFX6-NEXT:    v_mov_b32_e32 v0, v6
662; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
663; GFX6-NEXT:    ; return to shader part epilog
664;
665; GFX8PLUS-LABEL: buffer_load_v4i32_tfe:
666; GFX8PLUS:       ; %bb.0:
667; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
668; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
669; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
670; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
671; GFX8PLUS-NEXT:    v_mov_b32_e32 v6, v2
672; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
673; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
674; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
675; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
676; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
677; GFX8PLUS-NEXT:    ; return to shader part epilog
678;
679; GFX11-LABEL: buffer_load_v4i32_tfe:
680; GFX11:       ; %bb.0:
681; GFX11-NEXT:    v_mov_b32_e32 v2, 0
682; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
683; GFX11-NEXT:    v_mov_b32_e32 v3, v2
684; GFX11-NEXT:    v_mov_b32_e32 v4, v2
685; GFX11-NEXT:    v_mov_b32_e32 v5, v2
686; GFX11-NEXT:    v_mov_b32_e32 v6, v2
687; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
688; GFX11-NEXT:    s_waitcnt vmcnt(0)
689; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
690; GFX11-NEXT:    v_mov_b32_e32 v0, v6
691; GFX11-NEXT:    ; return to shader part epilog
692;
693; NOPRT-LABEL: buffer_load_v4i32_tfe:
694; NOPRT:       ; %bb.0:
695; NOPRT-NEXT:    v_mov_b32_e32 v6, 0
696; NOPRT-NEXT:    buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe
697; NOPRT-NEXT:    s_waitcnt vmcnt(0)
698; NOPRT-NEXT:    global_store_b128 v[0:1], v[2:5], off
699; NOPRT-NEXT:    v_mov_b32_e32 v0, v6
700; NOPRT-NEXT:    ; return to shader part epilog
701  %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
702  %data = extractvalue { <4 x i32>, i32 } %load, 0
703  store <4 x i32> %data, ptr addrspace(1) %out
704  %status = extractvalue { <4 x i32>, i32 } %load, 1
705  %fstatus = bitcast i32 %status to float
706  ret float %fstatus
707}
708
709define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
710; GFX6-LABEL: buffer_load_v4f32_tfe:
711; GFX6:       ; %bb.0:
712; GFX6-NEXT:    v_mov_b32_e32 v2, 0
713; GFX6-NEXT:    v_mov_b32_e32 v3, v2
714; GFX6-NEXT:    v_mov_b32_e32 v4, v2
715; GFX6-NEXT:    v_mov_b32_e32 v5, v2
716; GFX6-NEXT:    v_mov_b32_e32 v6, v2
717; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
718; GFX6-NEXT:    s_mov_b32 s2, 0
719; GFX6-NEXT:    s_mov_b32 s3, 0xf000
720; GFX6-NEXT:    s_mov_b32 s0, s2
721; GFX6-NEXT:    s_mov_b32 s1, s2
722; GFX6-NEXT:    s_waitcnt vmcnt(0)
723; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
724; GFX6-NEXT:    v_mov_b32_e32 v0, v6
725; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
726; GFX6-NEXT:    ; return to shader part epilog
727;
728; GFX8PLUS-LABEL: buffer_load_v4f32_tfe:
729; GFX8PLUS:       ; %bb.0:
730; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
731; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
732; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
733; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
734; GFX8PLUS-NEXT:    v_mov_b32_e32 v6, v2
735; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
736; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
737; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
738; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
739; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
740; GFX8PLUS-NEXT:    ; return to shader part epilog
741;
742; GFX11-LABEL: buffer_load_v4f32_tfe:
743; GFX11:       ; %bb.0:
744; GFX11-NEXT:    v_mov_b32_e32 v2, 0
745; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
746; GFX11-NEXT:    v_mov_b32_e32 v3, v2
747; GFX11-NEXT:    v_mov_b32_e32 v4, v2
748; GFX11-NEXT:    v_mov_b32_e32 v5, v2
749; GFX11-NEXT:    v_mov_b32_e32 v6, v2
750; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
751; GFX11-NEXT:    s_waitcnt vmcnt(0)
752; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
753; GFX11-NEXT:    v_mov_b32_e32 v0, v6
754; GFX11-NEXT:    ; return to shader part epilog
755;
756; NOPRT-LABEL: buffer_load_v4f32_tfe:
757; NOPRT:       ; %bb.0:
758; NOPRT-NEXT:    v_mov_b32_e32 v6, 0
759; NOPRT-NEXT:    buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe
760; NOPRT-NEXT:    s_waitcnt vmcnt(0)
761; NOPRT-NEXT:    global_store_b128 v[0:1], v[2:5], off
762; NOPRT-NEXT:    v_mov_b32_e32 v0, v6
763; NOPRT-NEXT:    ; return to shader part epilog
764  %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
765  %data = extractvalue { <4 x float>, i32 } %load, 0
766  store <4 x float> %data, ptr addrspace(1) %out
767  %status = extractvalue { <4 x float>, i32 } %load, 1
768  %fstatus = bitcast i32 %status to float
769  ret float %fstatus
770}
771
772define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
773; GFX6-LABEL: buffer_load_v3i32_tfe:
774; GFX6:       ; %bb.0:
775; GFX6-NEXT:    v_mov_b32_e32 v2, 0
776; GFX6-NEXT:    v_mov_b32_e32 v3, v2
777; GFX6-NEXT:    v_mov_b32_e32 v4, v2
778; GFX6-NEXT:    v_mov_b32_e32 v5, v2
779; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
780; GFX6-NEXT:    s_mov_b32 s2, 0
781; GFX6-NEXT:    s_mov_b32 s3, 0xf000
782; GFX6-NEXT:    s_mov_b32 s0, s2
783; GFX6-NEXT:    s_mov_b32 s1, s2
784; GFX6-NEXT:    s_waitcnt vmcnt(0)
785; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
786; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
787; GFX6-NEXT:    v_mov_b32_e32 v0, v5
788; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
789; GFX6-NEXT:    ; return to shader part epilog
790;
791; GFX8PLUS-LABEL: buffer_load_v3i32_tfe:
792; GFX8PLUS:       ; %bb.0:
793; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
794; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
795; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
796; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
797; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
798; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
799; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
800; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
801; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
802; GFX8PLUS-NEXT:    ; return to shader part epilog
803;
804; GFX11-LABEL: buffer_load_v3i32_tfe:
805; GFX11:       ; %bb.0:
806; GFX11-NEXT:    v_mov_b32_e32 v2, 0
807; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
808; GFX11-NEXT:    v_mov_b32_e32 v3, v2
809; GFX11-NEXT:    v_mov_b32_e32 v4, v2
810; GFX11-NEXT:    v_mov_b32_e32 v5, v2
811; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
812; GFX11-NEXT:    s_waitcnt vmcnt(0)
813; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
814; GFX11-NEXT:    v_mov_b32_e32 v0, v5
815; GFX11-NEXT:    ; return to shader part epilog
816;
817; NOPRT-LABEL: buffer_load_v3i32_tfe:
818; NOPRT:       ; %bb.0:
819; NOPRT-NEXT:    v_mov_b32_e32 v5, 0
820; NOPRT-NEXT:    buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
821; NOPRT-NEXT:    s_waitcnt vmcnt(0)
822; NOPRT-NEXT:    global_store_b96 v[0:1], v[2:4], off
823; NOPRT-NEXT:    v_mov_b32_e32 v0, v5
824; NOPRT-NEXT:    ; return to shader part epilog
825  %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
826  %data = extractvalue { <3 x i32>, i32 } %load, 0
827  store <3 x i32> %data, ptr addrspace(1) %out
828  %status = extractvalue { <3 x i32>, i32 } %load, 1
829  %fstatus = bitcast i32 %status to float
830  ret float %fstatus
831}
832
833define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
834; GFX6-LABEL: buffer_load_v3f32_tfe:
835; GFX6:       ; %bb.0:
836; GFX6-NEXT:    v_mov_b32_e32 v2, 0
837; GFX6-NEXT:    v_mov_b32_e32 v3, v2
838; GFX6-NEXT:    v_mov_b32_e32 v4, v2
839; GFX6-NEXT:    v_mov_b32_e32 v5, v2
840; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
841; GFX6-NEXT:    s_mov_b32 s2, 0
842; GFX6-NEXT:    s_mov_b32 s3, 0xf000
843; GFX6-NEXT:    s_mov_b32 s0, s2
844; GFX6-NEXT:    s_mov_b32 s1, s2
845; GFX6-NEXT:    s_waitcnt vmcnt(0)
846; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
847; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
848; GFX6-NEXT:    v_mov_b32_e32 v0, v5
849; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
850; GFX6-NEXT:    ; return to shader part epilog
851;
852; GFX8PLUS-LABEL: buffer_load_v3f32_tfe:
853; GFX8PLUS:       ; %bb.0:
854; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
855; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
856; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
857; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
858; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
859; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
860; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
861; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
862; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
863; GFX8PLUS-NEXT:    ; return to shader part epilog
864;
865; GFX11-LABEL: buffer_load_v3f32_tfe:
866; GFX11:       ; %bb.0:
867; GFX11-NEXT:    v_mov_b32_e32 v2, 0
868; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
869; GFX11-NEXT:    v_mov_b32_e32 v3, v2
870; GFX11-NEXT:    v_mov_b32_e32 v4, v2
871; GFX11-NEXT:    v_mov_b32_e32 v5, v2
872; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
873; GFX11-NEXT:    s_waitcnt vmcnt(0)
874; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
875; GFX11-NEXT:    v_mov_b32_e32 v0, v5
876; GFX11-NEXT:    ; return to shader part epilog
877;
878; NOPRT-LABEL: buffer_load_v3f32_tfe:
879; NOPRT:       ; %bb.0:
880; NOPRT-NEXT:    v_mov_b32_e32 v5, 0
881; NOPRT-NEXT:    buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
882; NOPRT-NEXT:    s_waitcnt vmcnt(0)
883; NOPRT-NEXT:    global_store_b96 v[0:1], v[2:4], off
884; NOPRT-NEXT:    v_mov_b32_e32 v0, v5
885; NOPRT-NEXT:    ; return to shader part epilog
886  %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
887  %data = extractvalue { <3 x float>, i32 } %load, 0
888  store <3 x float> %data, ptr addrspace(1) %out
889  %status = extractvalue { <3 x float>, i32 } %load, 1
890  %fstatus = bitcast i32 %status to float
891  ret float %fstatus
892}
893
894define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
895; GFX6-LABEL: buffer_load_v2i32_tfe:
896; GFX6:       ; %bb.0:
897; GFX6-NEXT:    v_mov_b32_e32 v2, 0
898; GFX6-NEXT:    v_mov_b32_e32 v3, v2
899; GFX6-NEXT:    v_mov_b32_e32 v4, v2
900; GFX6-NEXT:    v_mov_b32_e32 v5, v2
901; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
902; GFX6-NEXT:    s_mov_b32 s2, 0
903; GFX6-NEXT:    s_mov_b32 s3, 0xf000
904; GFX6-NEXT:    s_mov_b32 s0, s2
905; GFX6-NEXT:    s_mov_b32 s1, s2
906; GFX6-NEXT:    s_waitcnt vmcnt(0)
907; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
908; GFX6-NEXT:    v_mov_b32_e32 v0, v4
909; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
910; GFX6-NEXT:    ; return to shader part epilog
911;
912; GFX8PLUS-LABEL: buffer_load_v2i32_tfe:
913; GFX8PLUS:       ; %bb.0:
914; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
915; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
916; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
917; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
918; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
919; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
920; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
921; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
922; GFX8PLUS-NEXT:    ; return to shader part epilog
923;
924; GFX11-LABEL: buffer_load_v2i32_tfe:
925; GFX11:       ; %bb.0:
926; GFX11-NEXT:    v_mov_b32_e32 v2, 0
927; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
928; GFX11-NEXT:    v_mov_b32_e32 v3, v2
929; GFX11-NEXT:    v_mov_b32_e32 v4, v2
930; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
931; GFX11-NEXT:    s_waitcnt vmcnt(0)
932; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
933; GFX11-NEXT:    v_mov_b32_e32 v0, v4
934; GFX11-NEXT:    ; return to shader part epilog
935;
936; NOPRT-LABEL: buffer_load_v2i32_tfe:
937; NOPRT:       ; %bb.0:
938; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
939; NOPRT-NEXT:    buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
940; NOPRT-NEXT:    s_waitcnt vmcnt(0)
941; NOPRT-NEXT:    global_store_b64 v[0:1], v[2:3], off
942; NOPRT-NEXT:    v_mov_b32_e32 v0, v4
943; NOPRT-NEXT:    ; return to shader part epilog
944  %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
945  %data = extractvalue { <2 x i32>, i32 } %load, 0
946  store <2 x i32> %data, ptr addrspace(1) %out
947  %status = extractvalue { <2 x i32>, i32 } %load, 1
948  %fstatus = bitcast i32 %status to float
949  ret float %fstatus
950}
951
952define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
953; GFX6-LABEL: buffer_load_v2f32_tfe:
954; GFX6:       ; %bb.0:
955; GFX6-NEXT:    v_mov_b32_e32 v2, 0
956; GFX6-NEXT:    v_mov_b32_e32 v3, v2
957; GFX6-NEXT:    v_mov_b32_e32 v4, v2
958; GFX6-NEXT:    v_mov_b32_e32 v5, v2
959; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
960; GFX6-NEXT:    s_mov_b32 s2, 0
961; GFX6-NEXT:    s_mov_b32 s3, 0xf000
962; GFX6-NEXT:    s_mov_b32 s0, s2
963; GFX6-NEXT:    s_mov_b32 s1, s2
964; GFX6-NEXT:    s_waitcnt vmcnt(0)
965; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
966; GFX6-NEXT:    v_mov_b32_e32 v0, v4
967; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
968; GFX6-NEXT:    ; return to shader part epilog
969;
970; GFX8PLUS-LABEL: buffer_load_v2f32_tfe:
971; GFX8PLUS:       ; %bb.0:
972; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
973; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
974; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
975; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
976; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
977; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
978; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
979; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
980; GFX8PLUS-NEXT:    ; return to shader part epilog
981;
982; GFX11-LABEL: buffer_load_v2f32_tfe:
983; GFX11:       ; %bb.0:
984; GFX11-NEXT:    v_mov_b32_e32 v2, 0
985; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
986; GFX11-NEXT:    v_mov_b32_e32 v3, v2
987; GFX11-NEXT:    v_mov_b32_e32 v4, v2
988; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
989; GFX11-NEXT:    s_waitcnt vmcnt(0)
990; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
991; GFX11-NEXT:    v_mov_b32_e32 v0, v4
992; GFX11-NEXT:    ; return to shader part epilog
993;
994; NOPRT-LABEL: buffer_load_v2f32_tfe:
995; NOPRT:       ; %bb.0:
996; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
997; NOPRT-NEXT:    buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
998; NOPRT-NEXT:    s_waitcnt vmcnt(0)
999; NOPRT-NEXT:    global_store_b64 v[0:1], v[2:3], off
1000; NOPRT-NEXT:    v_mov_b32_e32 v0, v4
1001; NOPRT-NEXT:    ; return to shader part epilog
1002  %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1003  %data = extractvalue { <2 x float>, i32 } %load, 0
1004  store <2 x float> %data, ptr addrspace(1) %out
1005  %status = extractvalue { <2 x float>, i32 } %load, 1
1006  %fstatus = bitcast i32 %status to float
1007  ret float %fstatus
1008}
1009
1010define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
1011; GFX6-LABEL: buffer_load_i32_tfe:
1012; GFX6:       ; %bb.0:
1013; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1014; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1015; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1016; GFX6-NEXT:    s_mov_b32 s2, 0
1017; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1018; GFX6-NEXT:    s_mov_b32 s0, s2
1019; GFX6-NEXT:    s_mov_b32 s1, s2
1020; GFX6-NEXT:    s_waitcnt vmcnt(0)
1021; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1022; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1023; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1024; GFX6-NEXT:    ; return to shader part epilog
1025;
1026; GFX8PLUS-LABEL: buffer_load_i32_tfe:
1027; GFX8PLUS:       ; %bb.0:
1028; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1029; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1030; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1031; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1032; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
1033; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
1034; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1035; GFX8PLUS-NEXT:    ; return to shader part epilog
1036;
1037; GFX11-LABEL: buffer_load_i32_tfe:
1038; GFX11:       ; %bb.0:
1039; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1040; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1041; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1042; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1043; GFX11-NEXT:    s_waitcnt vmcnt(0)
1044; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
1045; GFX11-NEXT:    v_mov_b32_e32 v0, v3
1046; GFX11-NEXT:    ; return to shader part epilog
1047;
1048; NOPRT-LABEL: buffer_load_i32_tfe:
1049; NOPRT:       ; %bb.0:
1050; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
1051; NOPRT-NEXT:    buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1052; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1053; NOPRT-NEXT:    global_store_b32 v[0:1], v2, off
1054; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
1055; NOPRT-NEXT:    ; return to shader part epilog
1056  %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1057  %data = extractvalue { i32, i32 } %load, 0
1058  store i32 %data, ptr addrspace(1) %out
1059  %status = extractvalue { i32, i32 } %load, 1
1060  %fstatus = bitcast i32 %status to float
1061  ret float %fstatus
1062}
1063
1064define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
1065; GFX6-LABEL: buffer_load_f32_tfe:
1066; GFX6:       ; %bb.0:
1067; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1068; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1069; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1070; GFX6-NEXT:    s_mov_b32 s2, 0
1071; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1072; GFX6-NEXT:    s_mov_b32 s0, s2
1073; GFX6-NEXT:    s_mov_b32 s1, s2
1074; GFX6-NEXT:    s_waitcnt vmcnt(0)
1075; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1076; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1077; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1078; GFX6-NEXT:    ; return to shader part epilog
1079;
1080; GFX8PLUS-LABEL: buffer_load_f32_tfe:
1081; GFX8PLUS:       ; %bb.0:
1082; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1083; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1084; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1085; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1086; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
1087; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
1088; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1089; GFX8PLUS-NEXT:    ; return to shader part epilog
1090;
1091; GFX11-LABEL: buffer_load_f32_tfe:
1092; GFX11:       ; %bb.0:
1093; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1094; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1095; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1096; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1097; GFX11-NEXT:    s_waitcnt vmcnt(0)
1098; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
1099; GFX11-NEXT:    v_mov_b32_e32 v0, v3
1100; GFX11-NEXT:    ; return to shader part epilog
1101;
1102; NOPRT-LABEL: buffer_load_f32_tfe:
1103; NOPRT:       ; %bb.0:
1104; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
1105; NOPRT-NEXT:    buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1106; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1107; NOPRT-NEXT:    global_store_b32 v[0:1], v2, off
1108; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
1109; NOPRT-NEXT:    ; return to shader part epilog
1110  %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
1111  %data = extractvalue { float, i32 } %load, 0
1112  store float %data, ptr addrspace(1) %out
1113  %status = extractvalue { float, i32 } %load, 1
1114  %fstatus = bitcast i32 %status to float
1115  ret float %fstatus
1116}
1117
1118declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
1119declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
1120declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
1121declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
1122declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1123declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1124declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1125declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1126declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1127declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1128declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1129declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
1130attributes #0 = { nounwind readonly }
1131