xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll (revision 75e528fdd9594ecb6fdb5d9e7bee1506f7e43be0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s
4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s
5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-enable-prt-strict-null -verify-machineinstrs | FileCheck --check-prefixes=NOPRT %s
6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck --check-prefixes=GFX12,GFX12-SDAG %s
7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck --check-prefixes=GFX12,GFX12-GISEL %s
8
9define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) {
10; GFX6-LABEL: buffer_load:
11; GFX6:       ; %bb.0: ; %main_body
12; GFX6-NEXT:    v_mov_b32_e32 v8, 0
13; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
14; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
15; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
16; GFX6-NEXT:    s_waitcnt vmcnt(0)
17; GFX6-NEXT:    ; return to shader part epilog
18;
19; GFX8PLUS-LABEL: buffer_load:
20; GFX8PLUS:       ; %bb.0: ; %main_body
21; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
22; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
23; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
24; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
25; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
26; GFX8PLUS-NEXT:    ; return to shader part epilog
27;
28; GFX11-LABEL: buffer_load:
29; GFX11:       ; %bb.0: ; %main_body
30; GFX11-NEXT:    v_mov_b32_e32 v8, 0
31; GFX11-NEXT:    s_clause 0x2
32; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
33; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
34; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
35; GFX11-NEXT:    s_waitcnt vmcnt(0)
36; GFX11-NEXT:    ; return to shader part epilog
37;
38; NOPRT-LABEL: buffer_load:
39; NOPRT:       ; %bb.0: ; %main_body
40; NOPRT-NEXT:    v_mov_b32_e32 v8, 0
41; NOPRT-NEXT:    s_clause 0x2
42; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
43; NOPRT-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
44; NOPRT-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
45; NOPRT-NEXT:    s_waitcnt vmcnt(0)
46; NOPRT-NEXT:    ; return to shader part epilog
47;
48; GFX12-LABEL: buffer_load:
49; GFX12:       ; %bb.0: ; %main_body
50; GFX12-NEXT:    v_mov_b32_e32 v8, 0
51; GFX12-NEXT:    s_clause 0x2
52; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], null idxen
53; GFX12-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], null idxen th:TH_LOAD_NT
54; GFX12-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], null idxen th:TH_LOAD_HT
55; GFX12-NEXT:    s_wait_loadcnt 0x0
56; GFX12-NEXT:    ; return to shader part epilog
57main_body:
58  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
59  %data_glc = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
60  %data_slc = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
61  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
62  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
63  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
64  ret {<4 x float>, <4 x float>, <4 x float>} %r2
65}
66
67define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) {
68; GFX6-LABEL: buffer_load_immoffs:
69; GFX6:       ; %bb.0: ; %main_body
70; GFX6-NEXT:    v_mov_b32_e32 v0, 0
71; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
72; GFX6-NEXT:    s_waitcnt vmcnt(0)
73; GFX6-NEXT:    ; return to shader part epilog
74;
75; GFX8PLUS-LABEL: buffer_load_immoffs:
76; GFX8PLUS:       ; %bb.0: ; %main_body
77; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
78; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
79; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
80; GFX8PLUS-NEXT:    ; return to shader part epilog
81;
82; GFX11-LABEL: buffer_load_immoffs:
83; GFX11:       ; %bb.0: ; %main_body
84; GFX11-NEXT:    v_mov_b32_e32 v0, 0
85; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
86; GFX11-NEXT:    s_waitcnt vmcnt(0)
87; GFX11-NEXT:    ; return to shader part epilog
88;
89; NOPRT-LABEL: buffer_load_immoffs:
90; NOPRT:       ; %bb.0: ; %main_body
91; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
92; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
93; NOPRT-NEXT:    s_waitcnt vmcnt(0)
94; NOPRT-NEXT:    ; return to shader part epilog
95;
96; GFX12-LABEL: buffer_load_immoffs:
97; GFX12:       ; %bb.0: ; %main_body
98; GFX12-NEXT:    v_mov_b32_e32 v0, 0
99; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:42
100; GFX12-NEXT:    s_wait_loadcnt 0x0
101; GFX12-NEXT:    ; return to shader part epilog
102main_body:
103  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 0)
104  ret <4 x float> %data
105}
106
107define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
108; GFX6-LABEL: buffer_load_immoffs_large:
109; GFX6:       ; %bb.0: ; %main_body
110; GFX6-NEXT:    v_mov_b32_e32 v8, 0
111; GFX6-NEXT:    s_movk_i32 s4, 0x7ffc
112; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
113; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
114; GFX6-NEXT:    s_mov_b32 s4, 0x8ffc
115; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
116; GFX6-NEXT:    s_waitcnt vmcnt(1)
117; GFX6-NEXT:    v_add_f32_e32 v3, v3, v7
118; GFX6-NEXT:    v_add_f32_e32 v2, v2, v6
119; GFX6-NEXT:    v_add_f32_e32 v1, v1, v5
120; GFX6-NEXT:    v_add_f32_e32 v0, v0, v4
121; GFX6-NEXT:    s_waitcnt vmcnt(0)
122; GFX6-NEXT:    v_add_f32_e32 v0, v8, v0
123; GFX6-NEXT:    v_add_f32_e32 v1, v9, v1
124; GFX6-NEXT:    v_add_f32_e32 v2, v10, v2
125; GFX6-NEXT:    v_add_f32_e32 v3, v11, v3
126; GFX6-NEXT:    ; return to shader part epilog
127;
128; GFX8PLUS-LABEL: buffer_load_immoffs_large:
129; GFX8PLUS:       ; %bb.0: ; %main_body
130; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
131; GFX8PLUS-NEXT:    s_movk_i32 s4, 0x7ffc
132; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
133; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
134; GFX8PLUS-NEXT:    s_mov_b32 s4, 0x8ffc
135; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
136; GFX8PLUS-NEXT:    s_waitcnt vmcnt(1)
137; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v3, v7
138; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v2, v6
139; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v1, v5
140; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v0, v4
141; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
142; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v8, v0
143; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v9, v1
144; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v10, v2
145; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v11, v3
146; GFX8PLUS-NEXT:    ; return to shader part epilog
147;
148; GFX11-LABEL: buffer_load_immoffs_large:
149; GFX11:       ; %bb.0: ; %main_body
150; GFX11-NEXT:    v_mov_b32_e32 v8, 0
151; GFX11-NEXT:    s_movk_i32 s4, 0x7ffc
152; GFX11-NEXT:    s_clause 0x1
153; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
154; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
155; GFX11-NEXT:    s_mov_b32 s4, 0x8ffc
156; GFX11-NEXT:    s_waitcnt vmcnt(0)
157; GFX11-NEXT:    v_add_f32_e32 v1, v1, v5
158; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
159; GFX11-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
160; GFX11-NEXT:    s_waitcnt vmcnt(0)
161; GFX11-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
162; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
163; GFX11-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
164; GFX11-NEXT:    v_add_f32_e32 v2, v10, v2
165; GFX11-NEXT:    ; return to shader part epilog
166;
167; NOPRT-LABEL: buffer_load_immoffs_large:
168; NOPRT:       ; %bb.0: ; %main_body
169; NOPRT-NEXT:    v_mov_b32_e32 v8, 0
170; NOPRT-NEXT:    s_movk_i32 s4, 0x7ffc
171; NOPRT-NEXT:    s_clause 0x1
172; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
173; NOPRT-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
174; NOPRT-NEXT:    s_mov_b32 s4, 0x8ffc
175; NOPRT-NEXT:    s_waitcnt vmcnt(0)
176; NOPRT-NEXT:    v_add_f32_e32 v1, v1, v5
177; NOPRT-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
178; NOPRT-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
179; NOPRT-NEXT:    s_waitcnt vmcnt(0)
180; NOPRT-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
181; NOPRT-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
182; NOPRT-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
183; NOPRT-NEXT:    v_add_f32_e32 v2, v10, v2
184; NOPRT-NEXT:    ; return to shader part epilog
185;
186; GFX12-LABEL: buffer_load_immoffs_large:
187; GFX12:       ; %bb.0: ; %main_body
188; GFX12-NEXT:    v_mov_b32_e32 v8, 0
189; GFX12-NEXT:    s_mov_b32 s4, 60
190; GFX12-NEXT:    s_movk_i32 s5, 0x7ffc
191; GFX12-NEXT:    s_clause 0x1
192; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], s4 idxen offset:4092
193; GFX12-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s5 idxen offset:4092
194; GFX12-NEXT:    s_mov_b32 s4, 0x8ffc
195; GFX12-NEXT:    s_wait_loadcnt 0x0
196; GFX12-NEXT:    v_add_f32_e32 v1, v1, v5
197; GFX12-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
198; GFX12-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
199; GFX12-NEXT:    s_wait_loadcnt 0x0
200; GFX12-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
201; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
202; GFX12-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
203; GFX12-NEXT:    v_add_f32_e32 v2, v10, v2
204; GFX12-NEXT:    ; return to shader part epilog
205main_body:
206  %d.0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 60, i32 0)
207  %d.1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 32764, i32 0)
208  %d.2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4, i32 36860, i32 0)
209  %d.3 = fadd <4 x float> %d.0, %d.1
210  %data = fadd <4 x float> %d.2, %d.3
211  ret <4 x float> %data
212}
213
214define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
215; GFX6-LABEL: buffer_load_voffset_large_12bit:
216; GFX6:       ; %bb.0: ; %main_body
217; GFX6-NEXT:    v_mov_b32_e32 v0, 0
218; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
219; GFX6-NEXT:    s_waitcnt vmcnt(0)
220; GFX6-NEXT:    ; return to shader part epilog
221;
222; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
223; GFX8PLUS:       ; %bb.0: ; %main_body
224; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
225; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
226; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
227; GFX8PLUS-NEXT:    ; return to shader part epilog
228;
229; GFX11-LABEL: buffer_load_voffset_large_12bit:
230; GFX11:       ; %bb.0: ; %main_body
231; GFX11-NEXT:    v_mov_b32_e32 v0, 0
232; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
233; GFX11-NEXT:    s_waitcnt vmcnt(0)
234; GFX11-NEXT:    ; return to shader part epilog
235;
236; NOPRT-LABEL: buffer_load_voffset_large_12bit:
237; NOPRT:       ; %bb.0: ; %main_body
238; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
239; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
240; NOPRT-NEXT:    s_waitcnt vmcnt(0)
241; NOPRT-NEXT:    ; return to shader part epilog
242;
243; GFX12-LABEL: buffer_load_voffset_large_12bit:
244; GFX12:       ; %bb.0: ; %main_body
245; GFX12-NEXT:    v_mov_b32_e32 v0, 0
246; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:4092
247; GFX12-NEXT:    s_wait_loadcnt 0x0
248; GFX12-NEXT:    ; return to shader part epilog
249main_body:
250  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 0)
251  ret <4 x float> %data
252}
253
254define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) {
255; GFX6-LABEL: buffer_load_voffset_large_13bit:
256; GFX6:       ; %bb.0: ; %main_body
257; GFX6-NEXT:    s_mov_b32 s4, 0
258; GFX6-NEXT:    v_mov_b32_e32 v1, 0x1000
259; GFX6-NEXT:    v_mov_b32_e32 v0, s4
260; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
261; GFX6-NEXT:    s_waitcnt vmcnt(0)
262; GFX6-NEXT:    ; return to shader part epilog
263;
264; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
265; GFX8PLUS:       ; %bb.0: ; %main_body
266; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
267; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x1000
268; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
269; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
270; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
271; GFX8PLUS-NEXT:    ; return to shader part epilog
272;
273; GFX11-LABEL: buffer_load_voffset_large_13bit:
274; GFX11:       ; %bb.0: ; %main_body
275; GFX11-NEXT:    s_mov_b32 s4, 0
276; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
277; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
278; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
279; GFX11-NEXT:    s_waitcnt vmcnt(0)
280; GFX11-NEXT:    ; return to shader part epilog
281;
282; NOPRT-LABEL: buffer_load_voffset_large_13bit:
283; NOPRT:       ; %bb.0: ; %main_body
284; NOPRT-NEXT:    s_mov_b32 s4, 0
285; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
286; NOPRT-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
287; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
288; NOPRT-NEXT:    s_waitcnt vmcnt(0)
289; NOPRT-NEXT:    ; return to shader part epilog
290;
291; GFX12-LABEL: buffer_load_voffset_large_13bit:
292; GFX12:       ; %bb.0: ; %main_body
293; GFX12-NEXT:    v_mov_b32_e32 v0, 0
294; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:8188
295; GFX12-NEXT:    s_wait_loadcnt 0x0
296; GFX12-NEXT:    ; return to shader part epilog
297main_body:
298  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 0)
299  ret <4 x float> %data
300}
301
302define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) {
303; GFX6-LABEL: buffer_load_voffset_large_16bit:
304; GFX6:       ; %bb.0: ; %main_body
305; GFX6-NEXT:    s_mov_b32 s4, 0
306; GFX6-NEXT:    v_mov_b32_e32 v1, 0xf000
307; GFX6-NEXT:    v_mov_b32_e32 v0, s4
308; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
309; GFX6-NEXT:    s_waitcnt vmcnt(0)
310; GFX6-NEXT:    ; return to shader part epilog
311;
312; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
313; GFX8PLUS:       ; %bb.0: ; %main_body
314; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
315; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xf000
316; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
317; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
318; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
319; GFX8PLUS-NEXT:    ; return to shader part epilog
320;
321; GFX11-LABEL: buffer_load_voffset_large_16bit:
322; GFX11:       ; %bb.0: ; %main_body
323; GFX11-NEXT:    s_mov_b32 s4, 0
324; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
325; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
326; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
327; GFX11-NEXT:    s_waitcnt vmcnt(0)
328; GFX11-NEXT:    ; return to shader part epilog
329;
330; NOPRT-LABEL: buffer_load_voffset_large_16bit:
331; NOPRT:       ; %bb.0: ; %main_body
332; NOPRT-NEXT:    s_mov_b32 s4, 0
333; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
334; NOPRT-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
335; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
336; NOPRT-NEXT:    s_waitcnt vmcnt(0)
337; NOPRT-NEXT:    ; return to shader part epilog
338;
339; GFX12-LABEL: buffer_load_voffset_large_16bit:
340; GFX12:       ; %bb.0: ; %main_body
341; GFX12-NEXT:    v_mov_b32_e32 v0, 0
342; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:65532
343; GFX12-NEXT:    s_wait_loadcnt 0x0
344; GFX12-NEXT:    ; return to shader part epilog
345main_body:
346  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 0)
347  ret <4 x float> %data
348}
349
350define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) {
351; GFX6-LABEL: buffer_load_voffset_large_23bit:
352; GFX6:       ; %bb.0: ; %main_body
353; GFX6-NEXT:    s_mov_b32 s4, 0
354; GFX6-NEXT:    v_mov_b32_e32 v1, 0x7ff000
355; GFX6-NEXT:    v_mov_b32_e32 v0, s4
356; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
357; GFX6-NEXT:    s_waitcnt vmcnt(0)
358; GFX6-NEXT:    ; return to shader part epilog
359;
360; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
361; GFX8PLUS:       ; %bb.0: ; %main_body
362; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
363; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x7ff000
364; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
365; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
366; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
367; GFX8PLUS-NEXT:    ; return to shader part epilog
368;
369; GFX11-LABEL: buffer_load_voffset_large_23bit:
370; GFX11:       ; %bb.0: ; %main_body
371; GFX11-NEXT:    s_mov_b32 s4, 0
372; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
373; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
374; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
375; GFX11-NEXT:    s_waitcnt vmcnt(0)
376; GFX11-NEXT:    ; return to shader part epilog
377;
378; NOPRT-LABEL: buffer_load_voffset_large_23bit:
379; NOPRT:       ; %bb.0: ; %main_body
380; NOPRT-NEXT:    s_mov_b32 s4, 0
381; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
382; NOPRT-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
383; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
384; NOPRT-NEXT:    s_waitcnt vmcnt(0)
385; NOPRT-NEXT:    ; return to shader part epilog
386;
387; GFX12-LABEL: buffer_load_voffset_large_23bit:
388; GFX12:       ; %bb.0: ; %main_body
389; GFX12-NEXT:    v_mov_b32_e32 v0, 0
390; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:8388604
391; GFX12-NEXT:    s_wait_loadcnt 0x0
392; GFX12-NEXT:    ; return to shader part epilog
393main_body:
394  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 0)
395  ret <4 x float> %data
396}
397
398define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) {
399; GFX6-LABEL: buffer_load_voffset_large_24bit:
400; GFX6:       ; %bb.0: ; %main_body
401; GFX6-NEXT:    s_mov_b32 s4, 0
402; GFX6-NEXT:    v_mov_b32_e32 v1, 0xfff000
403; GFX6-NEXT:    v_mov_b32_e32 v0, s4
404; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
405; GFX6-NEXT:    s_waitcnt vmcnt(0)
406; GFX6-NEXT:    ; return to shader part epilog
407;
408; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
409; GFX8PLUS:       ; %bb.0: ; %main_body
410; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
411; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xfff000
412; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
413; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
414; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
415; GFX8PLUS-NEXT:    ; return to shader part epilog
416;
417; GFX11-LABEL: buffer_load_voffset_large_24bit:
418; GFX11:       ; %bb.0: ; %main_body
419; GFX11-NEXT:    s_mov_b32 s4, 0
420; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
421; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
422; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
423; GFX11-NEXT:    s_waitcnt vmcnt(0)
424; GFX11-NEXT:    ; return to shader part epilog
425;
426; NOPRT-LABEL: buffer_load_voffset_large_24bit:
427; NOPRT:       ; %bb.0: ; %main_body
428; NOPRT-NEXT:    s_mov_b32 s4, 0
429; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
430; NOPRT-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
431; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
432; NOPRT-NEXT:    s_waitcnt vmcnt(0)
433; NOPRT-NEXT:    ; return to shader part epilog
434;
435; GFX12-SDAG-LABEL: buffer_load_voffset_large_24bit:
436; GFX12-SDAG:       ; %bb.0: ; %main_body
437; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 0x800000 :: v_dual_mov_b32 v0, 0
438; GFX12-SDAG-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:8388604
439; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
440; GFX12-SDAG-NEXT:    ; return to shader part epilog
441;
442; GFX12-GISEL-LABEL: buffer_load_voffset_large_24bit:
443; GFX12-GISEL:       ; %bb.0: ; %main_body
444; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x800000
445; GFX12-GISEL-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:8388604
446; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
447; GFX12-GISEL-NEXT:    ; return to shader part epilog
448main_body:
449  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 0)
450  ret <4 x float> %data
451}
452
453define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) {
454; GFX6-LABEL: buffer_load_idx:
455; GFX6:       ; %bb.0: ; %main_body
456; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
457; GFX6-NEXT:    s_waitcnt vmcnt(0)
458; GFX6-NEXT:    ; return to shader part epilog
459;
460; GFX8PLUS-LABEL: buffer_load_idx:
461; GFX8PLUS:       ; %bb.0: ; %main_body
462; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
463; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
464; GFX8PLUS-NEXT:    ; return to shader part epilog
465;
466; GFX11-LABEL: buffer_load_idx:
467; GFX11:       ; %bb.0: ; %main_body
468; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
469; GFX11-NEXT:    s_waitcnt vmcnt(0)
470; GFX11-NEXT:    ; return to shader part epilog
471;
472; NOPRT-LABEL: buffer_load_idx:
473; NOPRT:       ; %bb.0: ; %main_body
474; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
475; NOPRT-NEXT:    s_waitcnt vmcnt(0)
476; NOPRT-NEXT:    ; return to shader part epilog
477;
478; GFX12-LABEL: buffer_load_idx:
479; GFX12:       ; %bb.0: ; %main_body
480; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen
481; GFX12-NEXT:    s_wait_loadcnt 0x0
482; GFX12-NEXT:    ; return to shader part epilog
483main_body:
484  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0)
485  ret <4 x float> %data
486}
487
488define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) {
489; GFX6-LABEL: buffer_load_ofs:
490; GFX6:       ; %bb.0: ; %main_body
491; GFX6-NEXT:    s_mov_b32 s4, 0
492; GFX6-NEXT:    v_mov_b32_e32 v1, v0
493; GFX6-NEXT:    v_mov_b32_e32 v0, s4
494; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
495; GFX6-NEXT:    s_waitcnt vmcnt(0)
496; GFX6-NEXT:    ; return to shader part epilog
497;
498; GFX8PLUS-LABEL: buffer_load_ofs:
499; GFX8PLUS:       ; %bb.0: ; %main_body
500; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
501; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
502; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
503; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
504; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
505; GFX8PLUS-NEXT:    ; return to shader part epilog
506;
507; GFX11-LABEL: buffer_load_ofs:
508; GFX11:       ; %bb.0: ; %main_body
509; GFX11-NEXT:    s_mov_b32 s4, 0
510; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
511; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
512; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
513; GFX11-NEXT:    s_waitcnt vmcnt(0)
514; GFX11-NEXT:    ; return to shader part epilog
515;
516; NOPRT-LABEL: buffer_load_ofs:
517; NOPRT:       ; %bb.0: ; %main_body
518; NOPRT-NEXT:    s_mov_b32 s4, 0
519; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
520; NOPRT-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
521; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
522; NOPRT-NEXT:    s_waitcnt vmcnt(0)
523; NOPRT-NEXT:    ; return to shader part epilog
524;
525; GFX12-LABEL: buffer_load_ofs:
526; GFX12:       ; %bb.0: ; %main_body
527; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
528; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen
529; GFX12-NEXT:    s_wait_loadcnt 0x0
530; GFX12-NEXT:    ; return to shader part epilog
531main_body:
532  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i32 0, i32 0)
533  ret <4 x float> %data
534}
535
536define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) {
537; GFX6-LABEL: buffer_load_ofs_imm:
538; GFX6:       ; %bb.0: ; %main_body
539; GFX6-NEXT:    s_mov_b32 s4, 0
540; GFX6-NEXT:    v_mov_b32_e32 v1, v0
541; GFX6-NEXT:    v_mov_b32_e32 v0, s4
542; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
543; GFX6-NEXT:    s_waitcnt vmcnt(0)
544; GFX6-NEXT:    ; return to shader part epilog
545;
546; GFX8PLUS-LABEL: buffer_load_ofs_imm:
547; GFX8PLUS:       ; %bb.0: ; %main_body
548; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
549; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
550; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
551; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
552; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
553; GFX8PLUS-NEXT:    ; return to shader part epilog
554;
555; GFX11-LABEL: buffer_load_ofs_imm:
556; GFX11:       ; %bb.0: ; %main_body
557; GFX11-NEXT:    s_mov_b32 s4, 0
558; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
559; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
560; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
561; GFX11-NEXT:    s_waitcnt vmcnt(0)
562; GFX11-NEXT:    ; return to shader part epilog
563;
564; NOPRT-LABEL: buffer_load_ofs_imm:
565; NOPRT:       ; %bb.0: ; %main_body
566; NOPRT-NEXT:    s_mov_b32 s4, 0
567; NOPRT-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
568; NOPRT-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
569; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
570; NOPRT-NEXT:    s_waitcnt vmcnt(0)
571; NOPRT-NEXT:    ; return to shader part epilog
572;
573; GFX12-LABEL: buffer_load_ofs_imm:
574; GFX12:       ; %bb.0: ; %main_body
575; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
576; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:60
577; GFX12-NEXT:    s_wait_loadcnt 0x0
578; GFX12-NEXT:    ; return to shader part epilog
579main_body:
580  %ofs = add i32 %1, 60
581  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i32 0, i32 0)
582  ret <4 x float> %data
583}
584
585define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) {
586; GFX6-LABEL: buffer_load_both:
587; GFX6:       ; %bb.0: ; %main_body
588; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
589; GFX6-NEXT:    s_waitcnt vmcnt(0)
590; GFX6-NEXT:    ; return to shader part epilog
591;
592; GFX8PLUS-LABEL: buffer_load_both:
593; GFX8PLUS:       ; %bb.0: ; %main_body
594; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
595; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
596; GFX8PLUS-NEXT:    ; return to shader part epilog
597;
598; GFX11-LABEL: buffer_load_both:
599; GFX11:       ; %bb.0: ; %main_body
600; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
601; GFX11-NEXT:    s_waitcnt vmcnt(0)
602; GFX11-NEXT:    ; return to shader part epilog
603;
604; NOPRT-LABEL: buffer_load_both:
605; NOPRT:       ; %bb.0: ; %main_body
606; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
607; NOPRT-NEXT:    s_waitcnt vmcnt(0)
608; NOPRT-NEXT:    ; return to shader part epilog
609;
610; GFX12-LABEL: buffer_load_both:
611; GFX12:       ; %bb.0: ; %main_body
612; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen
613; GFX12-NEXT:    s_wait_loadcnt 0x0
614; GFX12-NEXT:    ; return to shader part epilog
615main_body:
616  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i32 0, i32 0)
617  ret <4 x float> %data
618}
619
620define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) {
621; GFX6-LABEL: buffer_load_both_reversed:
622; GFX6:       ; %bb.0: ; %main_body
623; GFX6-NEXT:    v_mov_b32_e32 v2, v0
624; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
625; GFX6-NEXT:    s_waitcnt vmcnt(0)
626; GFX6-NEXT:    ; return to shader part epilog
627;
628; GFX8PLUS-LABEL: buffer_load_both_reversed:
629; GFX8PLUS:       ; %bb.0: ; %main_body
630; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, v0
631; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
632; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
633; GFX8PLUS-NEXT:    ; return to shader part epilog
634;
635; GFX11-LABEL: buffer_load_both_reversed:
636; GFX11:       ; %bb.0: ; %main_body
637; GFX11-NEXT:    v_mov_b32_e32 v2, v0
638; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
639; GFX11-NEXT:    s_waitcnt vmcnt(0)
640; GFX11-NEXT:    ; return to shader part epilog
641;
642; NOPRT-LABEL: buffer_load_both_reversed:
643; NOPRT:       ; %bb.0: ; %main_body
644; NOPRT-NEXT:    v_mov_b32_e32 v2, v0
645; NOPRT-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
646; NOPRT-NEXT:    s_waitcnt vmcnt(0)
647; NOPRT-NEXT:    ; return to shader part epilog
648;
649; GFX12-LABEL: buffer_load_both_reversed:
650; GFX12:       ; %bb.0: ; %main_body
651; GFX12-NEXT:    v_mov_b32_e32 v2, v0
652; GFX12-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], null idxen offen
653; GFX12-NEXT:    s_wait_loadcnt 0x0
654; GFX12-NEXT:    ; return to shader part epilog
655main_body:
656  %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i32 0, i32 0)
657  ret <4 x float> %data
658}
659
660define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) {
661; GFX6-LABEL: buffer_load_x:
662; GFX6:       ; %bb.0: ; %main_body
663; GFX6-NEXT:    v_mov_b32_e32 v0, 0
664; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
665; GFX6-NEXT:    s_waitcnt vmcnt(0)
666; GFX6-NEXT:    ; return to shader part epilog
667;
668; GFX8PLUS-LABEL: buffer_load_x:
669; GFX8PLUS:       ; %bb.0: ; %main_body
670; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
671; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
672; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
673; GFX8PLUS-NEXT:    ; return to shader part epilog
674;
675; GFX11-LABEL: buffer_load_x:
676; GFX11:       ; %bb.0: ; %main_body
677; GFX11-NEXT:    v_mov_b32_e32 v0, 0
678; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
679; GFX11-NEXT:    s_waitcnt vmcnt(0)
680; GFX11-NEXT:    ; return to shader part epilog
681;
682; NOPRT-LABEL: buffer_load_x:
683; NOPRT:       ; %bb.0: ; %main_body
684; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
685; NOPRT-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
686; NOPRT-NEXT:    s_waitcnt vmcnt(0)
687; NOPRT-NEXT:    ; return to shader part epilog
688;
689; GFX12-LABEL: buffer_load_x:
690; GFX12:       ; %bb.0: ; %main_body
691; GFX12-NEXT:    v_mov_b32_e32 v0, 0
692; GFX12-NEXT:    buffer_load_format_x v0, v0, s[0:3], null idxen
693; GFX12-NEXT:    s_wait_loadcnt 0x0
694; GFX12-NEXT:    ; return to shader part epilog
695main_body:
696  %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
697  ret float %data
698}
699
700define amdgpu_ps float @buffer_load_x_i32(<4 x i32> inreg %rsrc) {
701; GFX6-LABEL: buffer_load_x_i32:
702; GFX6:       ; %bb.0: ; %main_body
703; GFX6-NEXT:    v_mov_b32_e32 v0, 0
704; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
705; GFX6-NEXT:    s_waitcnt vmcnt(0)
706; GFX6-NEXT:    ; return to shader part epilog
707;
708; GFX8PLUS-LABEL: buffer_load_x_i32:
709; GFX8PLUS:       ; %bb.0: ; %main_body
710; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
711; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
712; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
713; GFX8PLUS-NEXT:    ; return to shader part epilog
714;
715; GFX11-LABEL: buffer_load_x_i32:
716; GFX11:       ; %bb.0: ; %main_body
717; GFX11-NEXT:    v_mov_b32_e32 v0, 0
718; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
719; GFX11-NEXT:    s_waitcnt vmcnt(0)
720; GFX11-NEXT:    ; return to shader part epilog
721;
722; NOPRT-LABEL: buffer_load_x_i32:
723; NOPRT:       ; %bb.0: ; %main_body
724; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
725; NOPRT-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
726; NOPRT-NEXT:    s_waitcnt vmcnt(0)
727; NOPRT-NEXT:    ; return to shader part epilog
728;
729; GFX12-LABEL: buffer_load_x_i32:
730; GFX12:       ; %bb.0: ; %main_body
731; GFX12-NEXT:    v_mov_b32_e32 v0, 0
732; GFX12-NEXT:    buffer_load_format_x v0, v0, s[0:3], null idxen
733; GFX12-NEXT:    s_wait_loadcnt 0x0
734; GFX12-NEXT:    ; return to shader part epilog
735main_body:
736  %data = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
737  %fdata = bitcast i32 %data to float
738  ret float %fdata
739}
740
741define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
742; GFX6-LABEL: buffer_load_xy:
743; GFX6:       ; %bb.0: ; %main_body
744; GFX6-NEXT:    v_mov_b32_e32 v0, 0
745; GFX6-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
746; GFX6-NEXT:    s_waitcnt vmcnt(0)
747; GFX6-NEXT:    ; return to shader part epilog
748;
749; GFX8PLUS-LABEL: buffer_load_xy:
750; GFX8PLUS:       ; %bb.0: ; %main_body
751; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
752; GFX8PLUS-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
753; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
754; GFX8PLUS-NEXT:    ; return to shader part epilog
755;
756; GFX11-LABEL: buffer_load_xy:
757; GFX11:       ; %bb.0: ; %main_body
758; GFX11-NEXT:    v_mov_b32_e32 v0, 0
759; GFX11-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
760; GFX11-NEXT:    s_waitcnt vmcnt(0)
761; GFX11-NEXT:    ; return to shader part epilog
762;
763; NOPRT-LABEL: buffer_load_xy:
764; NOPRT:       ; %bb.0: ; %main_body
765; NOPRT-NEXT:    v_mov_b32_e32 v0, 0
766; NOPRT-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
767; NOPRT-NEXT:    s_waitcnt vmcnt(0)
768; NOPRT-NEXT:    ; return to shader part epilog
769;
770; GFX12-LABEL: buffer_load_xy:
771; GFX12:       ; %bb.0: ; %main_body
772; GFX12-NEXT:    v_mov_b32_e32 v0, 0
773; GFX12-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], null idxen
774; GFX12-NEXT:    s_wait_loadcnt 0x0
775; GFX12-NEXT:    ; return to shader part epilog
776main_body:
777  %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
778  ret <2 x float> %data
779}
780
781define amdgpu_cs float @buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
782; GFX6-LABEL: buffer_load_v4i32_tfe:
783; GFX6:       ; %bb.0:
784; GFX6-NEXT:    v_mov_b32_e32 v2, 0
785; GFX6-NEXT:    v_mov_b32_e32 v7, 2
786; GFX6-NEXT:    v_mov_b32_e32 v3, v2
787; GFX6-NEXT:    v_mov_b32_e32 v4, v2
788; GFX6-NEXT:    v_mov_b32_e32 v5, v2
789; GFX6-NEXT:    v_mov_b32_e32 v6, v2
790; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe
791; GFX6-NEXT:    s_mov_b32 s2, 0
792; GFX6-NEXT:    s_mov_b32 s3, 0xf000
793; GFX6-NEXT:    s_mov_b32 s0, s2
794; GFX6-NEXT:    s_mov_b32 s1, s2
795; GFX6-NEXT:    s_waitcnt vmcnt(0)
796; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
797; GFX6-NEXT:    v_mov_b32_e32 v0, v6
798; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
799; GFX6-NEXT:    ; return to shader part epilog
800;
801; GFX8PLUS-LABEL: buffer_load_v4i32_tfe:
802; GFX8PLUS:       ; %bb.0:
803; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
804; GFX8PLUS-NEXT:    v_mov_b32_e32 v7, 2
805; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
806; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
807; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
808; GFX8PLUS-NEXT:    v_mov_b32_e32 v6, v2
809; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe
810; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
811; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
812; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
813; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
814; GFX8PLUS-NEXT:    ; return to shader part epilog
815;
816; GFX11-LABEL: buffer_load_v4i32_tfe:
817; GFX11:       ; %bb.0:
818; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v7, 2
819; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
820; GFX11-NEXT:    v_mov_b32_e32 v3, v2
821; GFX11-NEXT:    v_mov_b32_e32 v4, v2
822; GFX11-NEXT:    v_mov_b32_e32 v5, v2
823; GFX11-NEXT:    v_mov_b32_e32 v6, v2
824; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe
825; GFX11-NEXT:    s_waitcnt vmcnt(0)
826; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
827; GFX11-NEXT:    v_mov_b32_e32 v0, v6
828; GFX11-NEXT:    ; return to shader part epilog
829;
830; NOPRT-LABEL: buffer_load_v4i32_tfe:
831; NOPRT:       ; %bb.0:
832; NOPRT-NEXT:    v_mov_b32_e32 v2, 2
833; NOPRT-NEXT:    v_mov_b32_e32 v6, 0
834; NOPRT-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
835; NOPRT-NEXT:    s_waitcnt vmcnt(0)
836; NOPRT-NEXT:    global_store_b128 v[0:1], v[2:5], off
837; NOPRT-NEXT:    v_mov_b32_e32 v0, v6
838; NOPRT-NEXT:    ; return to shader part epilog
839;
840; GFX12-LABEL: buffer_load_v4i32_tfe:
841; GFX12:       ; %bb.0:
842; GFX12-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v7, 2
843; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
844; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
845; GFX12-NEXT:    v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v6, v2
846; GFX12-NEXT:    buffer_load_format_xyzw v[2:6], v7, s[0:3], null idxen tfe
847; GFX12-NEXT:    s_wait_loadcnt 0x0
848; GFX12-NEXT:    global_store_b128 v[0:1], v[2:5], off
849; GFX12-NEXT:    v_mov_b32_e32 v0, v6
850; GFX12-NEXT:    ; return to shader part epilog
851  %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 2, i32 0, i32 0, i32 0)
852  %data = extractvalue { <4 x i32>, i32 } %load, 0
853  store <4 x i32> %data, ptr addrspace(1) %out
854  %status = extractvalue { <4 x i32>, i32 } %load, 1
855  %fstatus = bitcast i32 %status to float
856  ret float %fstatus
857}
858
859define amdgpu_cs float @buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
860; GFX6-LABEL: buffer_load_v4f32_tfe:
861; GFX6:       ; %bb.0:
862; GFX6-NEXT:    v_mov_b32_e32 v2, 0
863; GFX6-NEXT:    v_mov_b32_e32 v3, v2
864; GFX6-NEXT:    v_mov_b32_e32 v4, v2
865; GFX6-NEXT:    v_mov_b32_e32 v5, v2
866; GFX6-NEXT:    v_mov_b32_e32 v6, v2
867; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
868; GFX6-NEXT:    s_mov_b32 s2, 0
869; GFX6-NEXT:    s_mov_b32 s3, 0xf000
870; GFX6-NEXT:    s_mov_b32 s0, s2
871; GFX6-NEXT:    s_mov_b32 s1, s2
872; GFX6-NEXT:    s_waitcnt vmcnt(0)
873; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
874; GFX6-NEXT:    v_mov_b32_e32 v0, v6
875; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
876; GFX6-NEXT:    ; return to shader part epilog
877;
878; GFX8PLUS-LABEL: buffer_load_v4f32_tfe:
879; GFX8PLUS:       ; %bb.0:
880; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
881; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
882; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
883; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
884; GFX8PLUS-NEXT:    v_mov_b32_e32 v6, v2
885; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
886; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
887; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
888; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
889; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
890; GFX8PLUS-NEXT:    ; return to shader part epilog
891;
892; GFX11-LABEL: buffer_load_v4f32_tfe:
893; GFX11:       ; %bb.0:
894; GFX11-NEXT:    v_mov_b32_e32 v2, 0
895; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
896; GFX11-NEXT:    v_mov_b32_e32 v3, v2
897; GFX11-NEXT:    v_mov_b32_e32 v4, v2
898; GFX11-NEXT:    v_mov_b32_e32 v5, v2
899; GFX11-NEXT:    v_mov_b32_e32 v6, v2
900; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
901; GFX11-NEXT:    s_waitcnt vmcnt(0)
902; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
903; GFX11-NEXT:    v_mov_b32_e32 v0, v6
904; GFX11-NEXT:    ; return to shader part epilog
905;
906; NOPRT-LABEL: buffer_load_v4f32_tfe:
907; NOPRT:       ; %bb.0:
908; NOPRT-NEXT:    v_mov_b32_e32 v6, 0
909; NOPRT-NEXT:    buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe
910; NOPRT-NEXT:    s_waitcnt vmcnt(0)
911; NOPRT-NEXT:    global_store_b128 v[0:1], v[2:5], off
912; NOPRT-NEXT:    v_mov_b32_e32 v0, v6
913; NOPRT-NEXT:    ; return to shader part epilog
914;
915; GFX12-LABEL: buffer_load_v4f32_tfe:
916; GFX12:       ; %bb.0:
917; GFX12-NEXT:    v_mov_b32_e32 v2, 0
918; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
919; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
920; GFX12-NEXT:    v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v6, v2
921; GFX12-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], null idxen tfe
922; GFX12-NEXT:    s_wait_loadcnt 0x0
923; GFX12-NEXT:    global_store_b128 v[0:1], v[2:5], off
924; GFX12-NEXT:    v_mov_b32_e32 v0, v6
925; GFX12-NEXT:    ; return to shader part epilog
926  %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
927  %data = extractvalue { <4 x float>, i32 } %load, 0
928  store <4 x float> %data, ptr addrspace(1) %out
929  %status = extractvalue { <4 x float>, i32 } %load, 1
930  %fstatus = bitcast i32 %status to float
931  ret float %fstatus
932}
933
934define amdgpu_cs float @buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
935; GFX6-LABEL: buffer_load_v3i32_tfe:
936; GFX6:       ; %bb.0:
937; GFX6-NEXT:    v_mov_b32_e32 v2, 0
938; GFX6-NEXT:    v_mov_b32_e32 v3, v2
939; GFX6-NEXT:    v_mov_b32_e32 v4, v2
940; GFX6-NEXT:    v_mov_b32_e32 v5, v2
941; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
942; GFX6-NEXT:    s_mov_b32 s2, 0
943; GFX6-NEXT:    s_mov_b32 s3, 0xf000
944; GFX6-NEXT:    s_mov_b32 s0, s2
945; GFX6-NEXT:    s_mov_b32 s1, s2
946; GFX6-NEXT:    s_waitcnt vmcnt(0)
947; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
948; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
949; GFX6-NEXT:    v_mov_b32_e32 v0, v5
950; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
951; GFX6-NEXT:    ; return to shader part epilog
952;
953; GFX8PLUS-LABEL: buffer_load_v3i32_tfe:
954; GFX8PLUS:       ; %bb.0:
955; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
956; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
957; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
958; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
959; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
960; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
961; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
962; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
963; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
964; GFX8PLUS-NEXT:    ; return to shader part epilog
965;
966; GFX11-LABEL: buffer_load_v3i32_tfe:
967; GFX11:       ; %bb.0:
968; GFX11-NEXT:    v_mov_b32_e32 v2, 0
969; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
970; GFX11-NEXT:    v_mov_b32_e32 v3, v2
971; GFX11-NEXT:    v_mov_b32_e32 v4, v2
972; GFX11-NEXT:    v_mov_b32_e32 v5, v2
973; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
974; GFX11-NEXT:    s_waitcnt vmcnt(0)
975; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
976; GFX11-NEXT:    v_mov_b32_e32 v0, v5
977; GFX11-NEXT:    ; return to shader part epilog
978;
979; NOPRT-LABEL: buffer_load_v3i32_tfe:
980; NOPRT:       ; %bb.0:
981; NOPRT-NEXT:    v_mov_b32_e32 v5, 0
982; NOPRT-NEXT:    buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
983; NOPRT-NEXT:    s_waitcnt vmcnt(0)
984; NOPRT-NEXT:    global_store_b96 v[0:1], v[2:4], off
985; NOPRT-NEXT:    v_mov_b32_e32 v0, v5
986; NOPRT-NEXT:    ; return to shader part epilog
987;
988; GFX12-LABEL: buffer_load_v3i32_tfe:
989; GFX12:       ; %bb.0:
990; GFX12-NEXT:    v_mov_b32_e32 v2, 0
991; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
992; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
993; GFX12-NEXT:    v_mov_b32_e32 v5, v2
994; GFX12-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], null idxen tfe
995; GFX12-NEXT:    s_wait_loadcnt 0x0
996; GFX12-NEXT:    global_store_b96 v[0:1], v[2:4], off
997; GFX12-NEXT:    v_mov_b32_e32 v0, v5
998; GFX12-NEXT:    ; return to shader part epilog
999  %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1000  %data = extractvalue { <3 x i32>, i32 } %load, 0
1001  store <3 x i32> %data, ptr addrspace(1) %out
1002  %status = extractvalue { <3 x i32>, i32 } %load, 1
1003  %fstatus = bitcast i32 %status to float
1004  ret float %fstatus
1005}
1006
1007define amdgpu_cs float @buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
1008; GFX6-LABEL: buffer_load_v3f32_tfe:
1009; GFX6:       ; %bb.0:
1010; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1011; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1012; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1013; GFX6-NEXT:    v_mov_b32_e32 v5, v2
1014; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
1015; GFX6-NEXT:    s_mov_b32 s2, 0
1016; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1017; GFX6-NEXT:    s_mov_b32 s0, s2
1018; GFX6-NEXT:    s_mov_b32 s1, s2
1019; GFX6-NEXT:    s_waitcnt vmcnt(0)
1020; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
1021; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
1022; GFX6-NEXT:    v_mov_b32_e32 v0, v5
1023; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1024; GFX6-NEXT:    ; return to shader part epilog
1025;
1026; GFX8PLUS-LABEL: buffer_load_v3f32_tfe:
1027; GFX8PLUS:       ; %bb.0:
1028; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1029; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1030; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
1031; GFX8PLUS-NEXT:    v_mov_b32_e32 v5, v2
1032; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
1033; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1034; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
1035; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
1036; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1037; GFX8PLUS-NEXT:    ; return to shader part epilog
1038;
1039; GFX11-LABEL: buffer_load_v3f32_tfe:
1040; GFX11:       ; %bb.0:
1041; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1042; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1043; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1044; GFX11-NEXT:    v_mov_b32_e32 v4, v2
1045; GFX11-NEXT:    v_mov_b32_e32 v5, v2
1046; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
1047; GFX11-NEXT:    s_waitcnt vmcnt(0)
1048; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
1049; GFX11-NEXT:    v_mov_b32_e32 v0, v5
1050; GFX11-NEXT:    ; return to shader part epilog
1051;
1052; NOPRT-LABEL: buffer_load_v3f32_tfe:
1053; NOPRT:       ; %bb.0:
1054; NOPRT-NEXT:    v_mov_b32_e32 v5, 0
1055; NOPRT-NEXT:    buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe
1056; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1057; NOPRT-NEXT:    global_store_b96 v[0:1], v[2:4], off
1058; NOPRT-NEXT:    v_mov_b32_e32 v0, v5
1059; NOPRT-NEXT:    ; return to shader part epilog
1060;
1061; GFX12-LABEL: buffer_load_v3f32_tfe:
1062; GFX12:       ; %bb.0:
1063; GFX12-NEXT:    v_mov_b32_e32 v2, 0
1064; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1065; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
1066; GFX12-NEXT:    v_mov_b32_e32 v5, v2
1067; GFX12-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], null idxen tfe
1068; GFX12-NEXT:    s_wait_loadcnt 0x0
1069; GFX12-NEXT:    global_store_b96 v[0:1], v[2:4], off
1070; GFX12-NEXT:    v_mov_b32_e32 v0, v5
1071; GFX12-NEXT:    ; return to shader part epilog
1072  %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1073  %data = extractvalue { <3 x float>, i32 } %load, 0
1074  store <3 x float> %data, ptr addrspace(1) %out
1075  %status = extractvalue { <3 x float>, i32 } %load, 1
1076  %fstatus = bitcast i32 %status to float
1077  ret float %fstatus
1078}
1079
1080define amdgpu_cs float @buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
1081; GFX6-LABEL: buffer_load_v2i32_tfe:
1082; GFX6:       ; %bb.0:
1083; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1084; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1085; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1086; GFX6-NEXT:    v_mov_b32_e32 v5, v2
1087; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
1088; GFX6-NEXT:    s_mov_b32 s2, 0
1089; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1090; GFX6-NEXT:    s_mov_b32 s0, s2
1091; GFX6-NEXT:    s_mov_b32 s1, s2
1092; GFX6-NEXT:    s_waitcnt vmcnt(0)
1093; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
1094; GFX6-NEXT:    v_mov_b32_e32 v0, v4
1095; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1096; GFX6-NEXT:    ; return to shader part epilog
1097;
1098; GFX8PLUS-LABEL: buffer_load_v2i32_tfe:
1099; GFX8PLUS:       ; %bb.0:
1100; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1101; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1102; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
1103; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
1104; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1105; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1106; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
1107; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1108; GFX8PLUS-NEXT:    ; return to shader part epilog
1109;
1110; GFX11-LABEL: buffer_load_v2i32_tfe:
1111; GFX11:       ; %bb.0:
1112; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1113; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1114; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1115; GFX11-NEXT:    v_mov_b32_e32 v4, v2
1116; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
1117; GFX11-NEXT:    s_waitcnt vmcnt(0)
1118; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
1119; GFX11-NEXT:    v_mov_b32_e32 v0, v4
1120; GFX11-NEXT:    ; return to shader part epilog
1121;
1122; NOPRT-LABEL: buffer_load_v2i32_tfe:
1123; NOPRT:       ; %bb.0:
1124; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
1125; NOPRT-NEXT:    buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
1126; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1127; NOPRT-NEXT:    global_store_b64 v[0:1], v[2:3], off
1128; NOPRT-NEXT:    v_mov_b32_e32 v0, v4
1129; NOPRT-NEXT:    ; return to shader part epilog
1130;
1131; GFX12-LABEL: buffer_load_v2i32_tfe:
1132; GFX12:       ; %bb.0:
1133; GFX12-NEXT:    v_mov_b32_e32 v2, 0
1134; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1135; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
1136; GFX12-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], null idxen tfe
1137; GFX12-NEXT:    s_wait_loadcnt 0x0
1138; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
1139; GFX12-NEXT:    v_mov_b32_e32 v0, v4
1140; GFX12-NEXT:    ; return to shader part epilog
1141  %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1142  %data = extractvalue { <2 x i32>, i32 } %load, 0
1143  store <2 x i32> %data, ptr addrspace(1) %out
1144  %status = extractvalue { <2 x i32>, i32 } %load, 1
1145  %fstatus = bitcast i32 %status to float
1146  ret float %fstatus
1147}
1148
1149define amdgpu_cs float @buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
1150; GFX6-LABEL: buffer_load_v2f32_tfe:
1151; GFX6:       ; %bb.0:
1152; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1153; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1154; GFX6-NEXT:    v_mov_b32_e32 v4, v2
1155; GFX6-NEXT:    v_mov_b32_e32 v5, v2
1156; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
1157; GFX6-NEXT:    s_mov_b32 s2, 0
1158; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1159; GFX6-NEXT:    s_mov_b32 s0, s2
1160; GFX6-NEXT:    s_mov_b32 s1, s2
1161; GFX6-NEXT:    s_waitcnt vmcnt(0)
1162; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
1163; GFX6-NEXT:    v_mov_b32_e32 v0, v4
1164; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1165; GFX6-NEXT:    ; return to shader part epilog
1166;
1167; GFX8PLUS-LABEL: buffer_load_v2f32_tfe:
1168; GFX8PLUS:       ; %bb.0:
1169; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1170; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1171; GFX8PLUS-NEXT:    v_mov_b32_e32 v4, v2
1172; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
1173; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1174; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
1175; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
1176; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1177; GFX8PLUS-NEXT:    ; return to shader part epilog
1178;
1179; GFX11-LABEL: buffer_load_v2f32_tfe:
1180; GFX11:       ; %bb.0:
1181; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1182; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1183; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1184; GFX11-NEXT:    v_mov_b32_e32 v4, v2
1185; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
1186; GFX11-NEXT:    s_waitcnt vmcnt(0)
1187; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
1188; GFX11-NEXT:    v_mov_b32_e32 v0, v4
1189; GFX11-NEXT:    ; return to shader part epilog
1190;
1191; NOPRT-LABEL: buffer_load_v2f32_tfe:
1192; NOPRT:       ; %bb.0:
1193; NOPRT-NEXT:    v_mov_b32_e32 v4, 0
1194; NOPRT-NEXT:    buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe
1195; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1196; NOPRT-NEXT:    global_store_b64 v[0:1], v[2:3], off
1197; NOPRT-NEXT:    v_mov_b32_e32 v0, v4
1198; NOPRT-NEXT:    ; return to shader part epilog
1199;
1200; GFX12-LABEL: buffer_load_v2f32_tfe:
1201; GFX12:       ; %bb.0:
1202; GFX12-NEXT:    v_mov_b32_e32 v2, 0
1203; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1204; GFX12-NEXT:    v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2
1205; GFX12-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], null idxen tfe
1206; GFX12-NEXT:    s_wait_loadcnt 0x0
1207; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
1208; GFX12-NEXT:    v_mov_b32_e32 v0, v4
1209; GFX12-NEXT:    ; return to shader part epilog
1210  %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1211  %data = extractvalue { <2 x float>, i32 } %load, 0
1212  store <2 x float> %data, ptr addrspace(1) %out
1213  %status = extractvalue { <2 x float>, i32 } %load, 1
1214  %fstatus = bitcast i32 %status to float
1215  ret float %fstatus
1216}
1217
1218define amdgpu_cs float @buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
1219; GFX6-LABEL: buffer_load_i32_tfe:
1220; GFX6:       ; %bb.0:
1221; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1222; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1223; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1224; GFX6-NEXT:    s_mov_b32 s2, 0
1225; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1226; GFX6-NEXT:    s_mov_b32 s0, s2
1227; GFX6-NEXT:    s_mov_b32 s1, s2
1228; GFX6-NEXT:    s_waitcnt vmcnt(0)
1229; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1230; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1231; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1232; GFX6-NEXT:    ; return to shader part epilog
1233;
1234; GFX8PLUS-LABEL: buffer_load_i32_tfe:
1235; GFX8PLUS:       ; %bb.0:
1236; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1237; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1238; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1239; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1240; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
1241; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
1242; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1243; GFX8PLUS-NEXT:    ; return to shader part epilog
1244;
1245; GFX11-LABEL: buffer_load_i32_tfe:
1246; GFX11:       ; %bb.0:
1247; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1248; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1249; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1250; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1251; GFX11-NEXT:    s_waitcnt vmcnt(0)
1252; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
1253; GFX11-NEXT:    v_mov_b32_e32 v0, v3
1254; GFX11-NEXT:    ; return to shader part epilog
1255;
1256; NOPRT-LABEL: buffer_load_i32_tfe:
1257; NOPRT:       ; %bb.0:
1258; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
1259; NOPRT-NEXT:    buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1260; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1261; NOPRT-NEXT:    global_store_b32 v[0:1], v2, off
1262; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
1263; NOPRT-NEXT:    ; return to shader part epilog
1264;
1265; GFX12-LABEL: buffer_load_i32_tfe:
1266; GFX12:       ; %bb.0:
1267; GFX12-NEXT:    v_mov_b32_e32 v2, 0
1268; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1269; GFX12-NEXT:    v_mov_b32_e32 v3, v2
1270; GFX12-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], null idxen tfe
1271; GFX12-NEXT:    s_wait_loadcnt 0x0
1272; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
1273; GFX12-NEXT:    v_mov_b32_e32 v0, v3
1274; GFX12-NEXT:    ; return to shader part epilog
1275  %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1276  %data = extractvalue { i32, i32 } %load, 0
1277  store i32 %data, ptr addrspace(1) %out
1278  %status = extractvalue { i32, i32 } %load, 1
1279  %fstatus = bitcast i32 %status to float
1280  ret float %fstatus
1281}
1282
1283define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) {
1284; GFX6-LABEL: buffer_load_f32_tfe:
1285; GFX6:       ; %bb.0:
1286; GFX6-NEXT:    v_mov_b32_e32 v2, 0
1287; GFX6-NEXT:    v_mov_b32_e32 v3, v2
1288; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1289; GFX6-NEXT:    s_mov_b32 s2, 0
1290; GFX6-NEXT:    s_mov_b32 s3, 0xf000
1291; GFX6-NEXT:    s_mov_b32 s0, s2
1292; GFX6-NEXT:    s_mov_b32 s1, s2
1293; GFX6-NEXT:    s_waitcnt vmcnt(0)
1294; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
1295; GFX6-NEXT:    v_mov_b32_e32 v0, v3
1296; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1297; GFX6-NEXT:    ; return to shader part epilog
1298;
1299; GFX8PLUS-LABEL: buffer_load_f32_tfe:
1300; GFX8PLUS:       ; %bb.0:
1301; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
1302; GFX8PLUS-NEXT:    v_mov_b32_e32 v3, v2
1303; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1304; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1305; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
1306; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
1307; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
1308; GFX8PLUS-NEXT:    ; return to shader part epilog
1309;
1310; GFX11-LABEL: buffer_load_f32_tfe:
1311; GFX11:       ; %bb.0:
1312; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1313; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1314; GFX11-NEXT:    v_mov_b32_e32 v3, v2
1315; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
1316; GFX11-NEXT:    s_waitcnt vmcnt(0)
1317; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
1318; GFX11-NEXT:    v_mov_b32_e32 v0, v3
1319; GFX11-NEXT:    ; return to shader part epilog
1320;
1321; NOPRT-LABEL: buffer_load_f32_tfe:
1322; NOPRT:       ; %bb.0:
1323; NOPRT-NEXT:    v_mov_b32_e32 v3, 0
1324; NOPRT-NEXT:    buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe
1325; NOPRT-NEXT:    s_waitcnt vmcnt(0)
1326; NOPRT-NEXT:    global_store_b32 v[0:1], v2, off
1327; NOPRT-NEXT:    v_mov_b32_e32 v0, v3
1328; NOPRT-NEXT:    ; return to shader part epilog
1329;
1330; GFX12-LABEL: buffer_load_f32_tfe:
1331; GFX12:       ; %bb.0:
1332; GFX12-NEXT:    v_mov_b32_e32 v2, 0
1333; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1334; GFX12-NEXT:    v_mov_b32_e32 v3, v2
1335; GFX12-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], null idxen tfe
1336; GFX12-NEXT:    s_wait_loadcnt 0x0
1337; GFX12-NEXT:    global_store_b32 v[0:1], v2, off
1338; GFX12-NEXT:    v_mov_b32_e32 v0, v3
1339; GFX12-NEXT:    ; return to shader part epilog
1340  %load = call { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
1341  %data = extractvalue { float, i32 } %load, 0
1342  store float %data, ptr addrspace(1) %out
1343  %status = extractvalue { float, i32 } %load, 1
1344  %fstatus = bitcast i32 %status to float
1345  ret float %fstatus
1346}
1347
1348declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #0
1349declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #0
1350declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
1351declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32) #0
1352declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1353declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1354declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1355declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1356declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1357declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1358declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1359declare { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
1360attributes #0 = { nounwind readonly }
1361