xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll (revision faa2c678aa1963147af35c3700e6b44c264af99f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s
3;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s
4;RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s
5
6define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) {
7; GFX6-LABEL: buffer_load:
8; GFX6:       ; %bb.0: ; %main_body
9; GFX6-NEXT:    v_mov_b32_e32 v8, 0
10; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
11; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
12; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
13; GFX6-NEXT:    s_waitcnt vmcnt(0)
14; GFX6-NEXT:    ; return to shader part epilog
15;
16; GFX8PLUS-LABEL: buffer_load:
17; GFX8PLUS:       ; %bb.0: ; %main_body
18; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
19; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
20; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
21; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
22; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
23; GFX8PLUS-NEXT:    ; return to shader part epilog
24;
25; GFX11-LABEL: buffer_load:
26; GFX11:       ; %bb.0: ; %main_body
27; GFX11-NEXT:    v_mov_b32_e32 v8, 0
28; GFX11-NEXT:    s_clause 0x2
29; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen
30; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc
31; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc
32; GFX11-NEXT:    s_waitcnt vmcnt(0)
33; GFX11-NEXT:    ; return to shader part epilog
34main_body:
35  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0)
36  %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1)
37  %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2)
38  %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0
39  %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1
40  %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2
41  ret {<4 x float>, <4 x float>, <4 x float>} %r2
42}
43
44define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) {
45; GFX6-LABEL: buffer_load_immoffs:
46; GFX6:       ; %bb.0: ; %main_body
47; GFX6-NEXT:    v_mov_b32_e32 v0, 0
48; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
49; GFX6-NEXT:    s_waitcnt vmcnt(0)
50; GFX6-NEXT:    ; return to shader part epilog
51;
52; GFX8PLUS-LABEL: buffer_load_immoffs:
53; GFX8PLUS:       ; %bb.0: ; %main_body
54; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
55; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
56; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
57; GFX8PLUS-NEXT:    ; return to shader part epilog
58;
59; GFX11-LABEL: buffer_load_immoffs:
60; GFX11:       ; %bb.0: ; %main_body
61; GFX11-NEXT:    v_mov_b32_e32 v0, 0
62; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42
63; GFX11-NEXT:    s_waitcnt vmcnt(0)
64; GFX11-NEXT:    ; return to shader part epilog
65main_body:
66  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0)
67  ret <4 x float> %data
68}
69
70define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) {
71; GFX6-LABEL: buffer_load_immoffs_large:
72; GFX6:       ; %bb.0: ; %main_body
73; GFX6-NEXT:    v_mov_b32_e32 v8, 0
74; GFX6-NEXT:    s_movk_i32 s4, 0x7ffc
75; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
76; GFX6-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
77; GFX6-NEXT:    s_mov_b32 s4, 0x8ffc
78; GFX6-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
79; GFX6-NEXT:    s_waitcnt vmcnt(1)
80; GFX6-NEXT:    v_add_f32_e32 v3, v3, v7
81; GFX6-NEXT:    v_add_f32_e32 v2, v2, v6
82; GFX6-NEXT:    v_add_f32_e32 v1, v1, v5
83; GFX6-NEXT:    v_add_f32_e32 v0, v0, v4
84; GFX6-NEXT:    s_waitcnt vmcnt(0)
85; GFX6-NEXT:    v_add_f32_e32 v0, v8, v0
86; GFX6-NEXT:    v_add_f32_e32 v1, v9, v1
87; GFX6-NEXT:    v_add_f32_e32 v2, v10, v2
88; GFX6-NEXT:    v_add_f32_e32 v3, v11, v3
89; GFX6-NEXT:    ; return to shader part epilog
90;
91; GFX8PLUS-LABEL: buffer_load_immoffs_large:
92; GFX8PLUS:       ; %bb.0: ; %main_body
93; GFX8PLUS-NEXT:    v_mov_b32_e32 v8, 0
94; GFX8PLUS-NEXT:    s_movk_i32 s4, 0x7ffc
95; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
96; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
97; GFX8PLUS-NEXT:    s_mov_b32 s4, 0x8ffc
98; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
99; GFX8PLUS-NEXT:    s_waitcnt vmcnt(1)
100; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v3, v7
101; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v2, v6
102; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v1, v5
103; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v0, v4
104; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
105; GFX8PLUS-NEXT:    v_add_f32_e32 v0, v8, v0
106; GFX8PLUS-NEXT:    v_add_f32_e32 v1, v9, v1
107; GFX8PLUS-NEXT:    v_add_f32_e32 v2, v10, v2
108; GFX8PLUS-NEXT:    v_add_f32_e32 v3, v11, v3
109; GFX8PLUS-NEXT:    ; return to shader part epilog
110;
111; GFX11-LABEL: buffer_load_immoffs_large:
112; GFX11:       ; %bb.0: ; %main_body
113; GFX11-NEXT:    v_mov_b32_e32 v8, 0
114; GFX11-NEXT:    s_movk_i32 s4, 0x7ffc
115; GFX11-NEXT:    s_clause 0x1
116; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092
117; GFX11-NEXT:    buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092
118; GFX11-NEXT:    s_mov_b32 s4, 0x8ffc
119; GFX11-NEXT:    s_waitcnt vmcnt(0)
120; GFX11-NEXT:    v_add_f32_e32 v1, v1, v5
121; GFX11-NEXT:    buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4
122; GFX11-NEXT:    v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7
123; GFX11-NEXT:    s_waitcnt vmcnt(0)
124; GFX11-NEXT:    v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1
125; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
126; GFX11-NEXT:    v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3
127; GFX11-NEXT:    v_add_f32_e32 v2, v10, v2
128; GFX11-NEXT:    ; return to shader part epilog
129main_body:
130  %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0)
131  %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0)
132  %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0)
133  %d.3 = fadd <4 x float> %d.0, %d.1
134  %data = fadd <4 x float> %d.2, %d.3
135  ret <4 x float> %data
136}
137
138define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) {
139; GFX6-LABEL: buffer_load_voffset_large_12bit:
140; GFX6:       ; %bb.0: ; %main_body
141; GFX6-NEXT:    v_mov_b32_e32 v0, 0
142; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
143; GFX6-NEXT:    s_waitcnt vmcnt(0)
144; GFX6-NEXT:    ; return to shader part epilog
145;
146; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit:
147; GFX8PLUS:       ; %bb.0: ; %main_body
148; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
149; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
150; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
151; GFX8PLUS-NEXT:    ; return to shader part epilog
152;
153; GFX11-LABEL: buffer_load_voffset_large_12bit:
154; GFX11:       ; %bb.0: ; %main_body
155; GFX11-NEXT:    v_mov_b32_e32 v0, 0
156; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092
157; GFX11-NEXT:    s_waitcnt vmcnt(0)
158; GFX11-NEXT:    ; return to shader part epilog
159main_body:
160  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0)
161  ret <4 x float> %data
162}
163
164define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) {
165; GFX6-LABEL: buffer_load_voffset_large_13bit:
166; GFX6:       ; %bb.0: ; %main_body
167; GFX6-NEXT:    s_mov_b32 s4, 0
168; GFX6-NEXT:    v_mov_b32_e32 v1, 0x1000
169; GFX6-NEXT:    v_mov_b32_e32 v0, s4
170; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
171; GFX6-NEXT:    s_waitcnt vmcnt(0)
172; GFX6-NEXT:    ; return to shader part epilog
173;
174; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit:
175; GFX8PLUS:       ; %bb.0: ; %main_body
176; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
177; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x1000
178; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
179; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
180; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
181; GFX8PLUS-NEXT:    ; return to shader part epilog
182;
183; GFX11-LABEL: buffer_load_voffset_large_13bit:
184; GFX11:       ; %bb.0: ; %main_body
185; GFX11-NEXT:    s_mov_b32 s4, 0
186; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
187; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
188; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
189; GFX11-NEXT:    s_waitcnt vmcnt(0)
190; GFX11-NEXT:    ; return to shader part epilog
191main_body:
192  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0)
193  ret <4 x float> %data
194}
195
196define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) {
197; GFX6-LABEL: buffer_load_voffset_large_16bit:
198; GFX6:       ; %bb.0: ; %main_body
199; GFX6-NEXT:    s_mov_b32 s4, 0
200; GFX6-NEXT:    v_mov_b32_e32 v1, 0xf000
201; GFX6-NEXT:    v_mov_b32_e32 v0, s4
202; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
203; GFX6-NEXT:    s_waitcnt vmcnt(0)
204; GFX6-NEXT:    ; return to shader part epilog
205;
206; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit:
207; GFX8PLUS:       ; %bb.0: ; %main_body
208; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
209; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xf000
210; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
211; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
212; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
213; GFX8PLUS-NEXT:    ; return to shader part epilog
214;
215; GFX11-LABEL: buffer_load_voffset_large_16bit:
216; GFX11:       ; %bb.0: ; %main_body
217; GFX11-NEXT:    s_mov_b32 s4, 0
218; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
219; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
220; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
221; GFX11-NEXT:    s_waitcnt vmcnt(0)
222; GFX11-NEXT:    ; return to shader part epilog
223main_body:
224  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0)
225  ret <4 x float> %data
226}
227
228define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) {
229; GFX6-LABEL: buffer_load_voffset_large_23bit:
230; GFX6:       ; %bb.0: ; %main_body
231; GFX6-NEXT:    s_mov_b32 s4, 0
232; GFX6-NEXT:    v_mov_b32_e32 v1, 0x7ff000
233; GFX6-NEXT:    v_mov_b32_e32 v0, s4
234; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
235; GFX6-NEXT:    s_waitcnt vmcnt(0)
236; GFX6-NEXT:    ; return to shader part epilog
237;
238; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit:
239; GFX8PLUS:       ; %bb.0: ; %main_body
240; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
241; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0x7ff000
242; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
243; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
244; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
245; GFX8PLUS-NEXT:    ; return to shader part epilog
246;
247; GFX11-LABEL: buffer_load_voffset_large_23bit:
248; GFX11:       ; %bb.0: ; %main_body
249; GFX11-NEXT:    s_mov_b32 s4, 0
250; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
251; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
252; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
253; GFX11-NEXT:    s_waitcnt vmcnt(0)
254; GFX11-NEXT:    ; return to shader part epilog
255main_body:
256  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0)
257  ret <4 x float> %data
258}
259
260define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) {
261; GFX6-LABEL: buffer_load_voffset_large_24bit:
262; GFX6:       ; %bb.0: ; %main_body
263; GFX6-NEXT:    s_mov_b32 s4, 0
264; GFX6-NEXT:    v_mov_b32_e32 v1, 0xfff000
265; GFX6-NEXT:    v_mov_b32_e32 v0, s4
266; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
267; GFX6-NEXT:    s_waitcnt vmcnt(0)
268; GFX6-NEXT:    ; return to shader part epilog
269;
270; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit:
271; GFX8PLUS:       ; %bb.0: ; %main_body
272; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
273; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, 0xfff000
274; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
275; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
276; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
277; GFX8PLUS-NEXT:    ; return to shader part epilog
278;
279; GFX11-LABEL: buffer_load_voffset_large_24bit:
280; GFX11:       ; %bb.0: ; %main_body
281; GFX11-NEXT:    s_mov_b32 s4, 0
282; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
283; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
284; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092
285; GFX11-NEXT:    s_waitcnt vmcnt(0)
286; GFX11-NEXT:    ; return to shader part epilog
287main_body:
288  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0)
289  ret <4 x float> %data
290}
291
292define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) {
293; GFX6-LABEL: buffer_load_idx:
294; GFX6:       ; %bb.0: ; %main_body
295; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
296; GFX6-NEXT:    s_waitcnt vmcnt(0)
297; GFX6-NEXT:    ; return to shader part epilog
298;
299; GFX8PLUS-LABEL: buffer_load_idx:
300; GFX8PLUS:       ; %bb.0: ; %main_body
301; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
302; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
303; GFX8PLUS-NEXT:    ; return to shader part epilog
304;
305; GFX11-LABEL: buffer_load_idx:
306; GFX11:       ; %bb.0: ; %main_body
307; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen
308; GFX11-NEXT:    s_waitcnt vmcnt(0)
309; GFX11-NEXT:    ; return to shader part epilog
310main_body:
311  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0)
312  ret <4 x float> %data
313}
314
315define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) {
316; GFX6-LABEL: buffer_load_ofs:
317; GFX6:       ; %bb.0: ; %main_body
318; GFX6-NEXT:    s_mov_b32 s4, 0
319; GFX6-NEXT:    v_mov_b32_e32 v1, v0
320; GFX6-NEXT:    v_mov_b32_e32 v0, s4
321; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
322; GFX6-NEXT:    s_waitcnt vmcnt(0)
323; GFX6-NEXT:    ; return to shader part epilog
324;
325; GFX8PLUS-LABEL: buffer_load_ofs:
326; GFX8PLUS:       ; %bb.0: ; %main_body
327; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
328; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
329; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
330; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
331; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
332; GFX8PLUS-NEXT:    ; return to shader part epilog
333;
334; GFX11-LABEL: buffer_load_ofs:
335; GFX11:       ; %bb.0: ; %main_body
336; GFX11-NEXT:    s_mov_b32 s4, 0
337; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
338; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
339; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
340; GFX11-NEXT:    s_waitcnt vmcnt(0)
341; GFX11-NEXT:    ; return to shader part epilog
342main_body:
343  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0)
344  ret <4 x float> %data
345}
346
347define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) {
348; GFX6-LABEL: buffer_load_ofs_imm:
349; GFX6:       ; %bb.0: ; %main_body
350; GFX6-NEXT:    s_mov_b32 s4, 0
351; GFX6-NEXT:    v_mov_b32_e32 v1, v0
352; GFX6-NEXT:    v_mov_b32_e32 v0, s4
353; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
354; GFX6-NEXT:    s_waitcnt vmcnt(0)
355; GFX6-NEXT:    ; return to shader part epilog
356;
357; GFX8PLUS-LABEL: buffer_load_ofs_imm:
358; GFX8PLUS:       ; %bb.0: ; %main_body
359; GFX8PLUS-NEXT:    s_mov_b32 s4, 0
360; GFX8PLUS-NEXT:    v_mov_b32_e32 v1, v0
361; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, s4
362; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
363; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
364; GFX8PLUS-NEXT:    ; return to shader part epilog
365;
366; GFX11-LABEL: buffer_load_ofs_imm:
367; GFX11:       ; %bb.0: ; %main_body
368; GFX11-NEXT:    s_mov_b32 s4, 0
369; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
370; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
371; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60
372; GFX11-NEXT:    s_waitcnt vmcnt(0)
373; GFX11-NEXT:    ; return to shader part epilog
374main_body:
375  %ofs = add i32 %1, 60
376  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0)
377  ret <4 x float> %data
378}
379
380define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) {
381; GFX6-LABEL: buffer_load_both:
382; GFX6:       ; %bb.0: ; %main_body
383; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
384; GFX6-NEXT:    s_waitcnt vmcnt(0)
385; GFX6-NEXT:    ; return to shader part epilog
386;
387; GFX8PLUS-LABEL: buffer_load_both:
388; GFX8PLUS:       ; %bb.0: ; %main_body
389; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
390; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
391; GFX8PLUS-NEXT:    ; return to shader part epilog
392;
393; GFX11-LABEL: buffer_load_both:
394; GFX11:       ; %bb.0: ; %main_body
395; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen
396; GFX11-NEXT:    s_waitcnt vmcnt(0)
397; GFX11-NEXT:    ; return to shader part epilog
398main_body:
399  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0)
400  ret <4 x float> %data
401}
402
403define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) {
404; GFX6-LABEL: buffer_load_both_reversed:
405; GFX6:       ; %bb.0: ; %main_body
406; GFX6-NEXT:    v_mov_b32_e32 v2, v0
407; GFX6-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
408; GFX6-NEXT:    s_waitcnt vmcnt(0)
409; GFX6-NEXT:    ; return to shader part epilog
410;
411; GFX8PLUS-LABEL: buffer_load_both_reversed:
412; GFX8PLUS:       ; %bb.0: ; %main_body
413; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, v0
414; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
415; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
416; GFX8PLUS-NEXT:    ; return to shader part epilog
417;
418; GFX11-LABEL: buffer_load_both_reversed:
419; GFX11:       ; %bb.0: ; %main_body
420; GFX11-NEXT:    v_mov_b32_e32 v2, v0
421; GFX11-NEXT:    buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen
422; GFX11-NEXT:    s_waitcnt vmcnt(0)
423; GFX11-NEXT:    ; return to shader part epilog
424main_body:
425  %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0)
426  ret <4 x float> %data
427}
428
429define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) {
430; GFX6-LABEL: buffer_load_x:
431; GFX6:       ; %bb.0: ; %main_body
432; GFX6-NEXT:    v_mov_b32_e32 v0, 0
433; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
434; GFX6-NEXT:    s_waitcnt vmcnt(0)
435; GFX6-NEXT:    ; return to shader part epilog
436;
437; GFX8PLUS-LABEL: buffer_load_x:
438; GFX8PLUS:       ; %bb.0: ; %main_body
439; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
440; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
441; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
442; GFX8PLUS-NEXT:    ; return to shader part epilog
443;
444; GFX11-LABEL: buffer_load_x:
445; GFX11:       ; %bb.0: ; %main_body
446; GFX11-NEXT:    v_mov_b32_e32 v0, 0
447; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
448; GFX11-NEXT:    s_waitcnt vmcnt(0)
449; GFX11-NEXT:    ; return to shader part epilog
450main_body:
451  %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
452  ret float %data
453}
454
455define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) {
456; GFX6-LABEL: buffer_load_x_i32:
457; GFX6:       ; %bb.0: ; %main_body
458; GFX6-NEXT:    v_mov_b32_e32 v0, 0
459; GFX6-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
460; GFX6-NEXT:    s_waitcnt vmcnt(0)
461; GFX6-NEXT:    ; return to shader part epilog
462;
463; GFX8PLUS-LABEL: buffer_load_x_i32:
464; GFX8PLUS:       ; %bb.0: ; %main_body
465; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
466; GFX8PLUS-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
467; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
468; GFX8PLUS-NEXT:    ; return to shader part epilog
469;
470; GFX11-LABEL: buffer_load_x_i32:
471; GFX11:       ; %bb.0: ; %main_body
472; GFX11-NEXT:    v_mov_b32_e32 v0, 0
473; GFX11-NEXT:    buffer_load_format_x v0, v0, s[0:3], 0 idxen
474; GFX11-NEXT:    s_waitcnt vmcnt(0)
475; GFX11-NEXT:    ; return to shader part epilog
476main_body:
477  %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
478  %fdata = bitcast i32 %data to float
479  ret float %fdata
480}
481
482define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) {
483; GFX6-LABEL: buffer_load_xy:
484; GFX6:       ; %bb.0: ; %main_body
485; GFX6-NEXT:    v_mov_b32_e32 v0, 0
486; GFX6-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
487; GFX6-NEXT:    s_waitcnt vmcnt(0)
488; GFX6-NEXT:    ; return to shader part epilog
489;
490; GFX8PLUS-LABEL: buffer_load_xy:
491; GFX8PLUS:       ; %bb.0: ; %main_body
492; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, 0
493; GFX8PLUS-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
494; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
495; GFX8PLUS-NEXT:    ; return to shader part epilog
496;
497; GFX11-LABEL: buffer_load_xy:
498; GFX11:       ; %bb.0: ; %main_body
499; GFX11-NEXT:    v_mov_b32_e32 v0, 0
500; GFX11-NEXT:    buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen
501; GFX11-NEXT:    s_waitcnt vmcnt(0)
502; GFX11-NEXT:    ; return to shader part epilog
503main_body:
504  %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
505  ret <2 x float> %data
506}
507
508define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
509; GFX6-LABEL: buffer_load_v4i32_tfe:
510; GFX6:       ; %bb.0:
511; GFX6-NEXT:    v_mov_b32_e32 v2, 0
512; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
513; GFX6-NEXT:    s_mov_b32 s2, 0
514; GFX6-NEXT:    s_mov_b32 s3, 0xf000
515; GFX6-NEXT:    s_mov_b32 s0, s2
516; GFX6-NEXT:    s_mov_b32 s1, s2
517; GFX6-NEXT:    s_waitcnt vmcnt(0)
518; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
519; GFX6-NEXT:    v_mov_b32_e32 v0, v6
520; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
521; GFX6-NEXT:    ; return to shader part epilog
522;
523; GFX8PLUS-LABEL: buffer_load_v4i32_tfe:
524; GFX8PLUS:       ; %bb.0:
525; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
526; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
527; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
528; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
529; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
530; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
531; GFX8PLUS-NEXT:    ; return to shader part epilog
532;
533; GFX11-LABEL: buffer_load_v4i32_tfe:
534; GFX11:       ; %bb.0:
535; GFX11-NEXT:    v_mov_b32_e32 v2, 0
536; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
537; GFX11-NEXT:    s_waitcnt vmcnt(0)
538; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
539; GFX11-NEXT:    v_mov_b32_e32 v0, v6
540; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
541; GFX11-NEXT:    ; return to shader part epilog
542  %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
543  %data = extractvalue { <4 x i32>, i32 } %load, 0
544  store <4 x i32> %data, ptr addrspace(1) %out
545  %status = extractvalue { <4 x i32>, i32 } %load, 1
546  %fstatus = bitcast i32 %status to float
547  ret float %fstatus
548}
549
550define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
551; GFX6-LABEL: buffer_load_v4f32_tfe:
552; GFX6:       ; %bb.0:
553; GFX6-NEXT:    v_mov_b32_e32 v2, 0
554; GFX6-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
555; GFX6-NEXT:    s_mov_b32 s2, 0
556; GFX6-NEXT:    s_mov_b32 s3, 0xf000
557; GFX6-NEXT:    s_mov_b32 s0, s2
558; GFX6-NEXT:    s_mov_b32 s1, s2
559; GFX6-NEXT:    s_waitcnt vmcnt(0)
560; GFX6-NEXT:    buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64
561; GFX6-NEXT:    v_mov_b32_e32 v0, v6
562; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
563; GFX6-NEXT:    ; return to shader part epilog
564;
565; GFX8PLUS-LABEL: buffer_load_v4f32_tfe:
566; GFX8PLUS:       ; %bb.0:
567; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
568; GFX8PLUS-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
569; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
570; GFX8PLUS-NEXT:    flat_store_dwordx4 v[0:1], v[2:5]
571; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v6
572; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
573; GFX8PLUS-NEXT:    ; return to shader part epilog
574;
575; GFX11-LABEL: buffer_load_v4f32_tfe:
576; GFX11:       ; %bb.0:
577; GFX11-NEXT:    v_mov_b32_e32 v2, 0
578; GFX11-NEXT:    buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe
579; GFX11-NEXT:    s_waitcnt vmcnt(0)
580; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
581; GFX11-NEXT:    v_mov_b32_e32 v0, v6
582; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
583; GFX11-NEXT:    ; return to shader part epilog
584  %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
585  %data = extractvalue { <4 x float>, i32 } %load, 0
586  store <4 x float> %data, ptr addrspace(1) %out
587  %status = extractvalue { <4 x float>, i32 } %load, 1
588  %fstatus = bitcast i32 %status to float
589  ret float %fstatus
590}
591
592define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
593; GFX6-LABEL: buffer_load_v3i32_tfe:
594; GFX6:       ; %bb.0:
595; GFX6-NEXT:    v_mov_b32_e32 v2, 0
596; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
597; GFX6-NEXT:    s_mov_b32 s2, 0
598; GFX6-NEXT:    s_mov_b32 s3, 0xf000
599; GFX6-NEXT:    s_mov_b32 s0, s2
600; GFX6-NEXT:    s_mov_b32 s1, s2
601; GFX6-NEXT:    s_waitcnt vmcnt(0)
602; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
603; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
604; GFX6-NEXT:    v_mov_b32_e32 v0, v5
605; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
606; GFX6-NEXT:    ; return to shader part epilog
607;
608; GFX8PLUS-LABEL: buffer_load_v3i32_tfe:
609; GFX8PLUS:       ; %bb.0:
610; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
611; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
612; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
613; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
614; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
615; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
616; GFX8PLUS-NEXT:    ; return to shader part epilog
617;
618; GFX11-LABEL: buffer_load_v3i32_tfe:
619; GFX11:       ; %bb.0:
620; GFX11-NEXT:    v_mov_b32_e32 v2, 0
621; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
622; GFX11-NEXT:    s_waitcnt vmcnt(0)
623; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
624; GFX11-NEXT:    v_mov_b32_e32 v0, v5
625; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
626; GFX11-NEXT:    ; return to shader part epilog
627  %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
628  %data = extractvalue { <3 x i32>, i32 } %load, 0
629  store <3 x i32> %data, ptr addrspace(1) %out
630  %status = extractvalue { <3 x i32>, i32 } %load, 1
631  %fstatus = bitcast i32 %status to float
632  ret float %fstatus
633}
634
635define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
636; GFX6-LABEL: buffer_load_v3f32_tfe:
637; GFX6:       ; %bb.0:
638; GFX6-NEXT:    v_mov_b32_e32 v2, 0
639; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
640; GFX6-NEXT:    s_mov_b32 s2, 0
641; GFX6-NEXT:    s_mov_b32 s3, 0xf000
642; GFX6-NEXT:    s_mov_b32 s0, s2
643; GFX6-NEXT:    s_mov_b32 s1, s2
644; GFX6-NEXT:    s_waitcnt vmcnt(0)
645; GFX6-NEXT:    buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8
646; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
647; GFX6-NEXT:    v_mov_b32_e32 v0, v5
648; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
649; GFX6-NEXT:    ; return to shader part epilog
650;
651; GFX8PLUS-LABEL: buffer_load_v3f32_tfe:
652; GFX8PLUS:       ; %bb.0:
653; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
654; GFX8PLUS-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
655; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
656; GFX8PLUS-NEXT:    flat_store_dwordx3 v[0:1], v[2:4]
657; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v5
658; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
659; GFX8PLUS-NEXT:    ; return to shader part epilog
660;
661; GFX11-LABEL: buffer_load_v3f32_tfe:
662; GFX11:       ; %bb.0:
663; GFX11-NEXT:    v_mov_b32_e32 v2, 0
664; GFX11-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
665; GFX11-NEXT:    s_waitcnt vmcnt(0)
666; GFX11-NEXT:    global_store_b96 v[0:1], v[2:4], off
667; GFX11-NEXT:    v_mov_b32_e32 v0, v5
668; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
669; GFX11-NEXT:    ; return to shader part epilog
670  %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
671  %data = extractvalue { <3 x float>, i32 } %load, 0
672  store <3 x float> %data, ptr addrspace(1) %out
673  %status = extractvalue { <3 x float>, i32 } %load, 1
674  %fstatus = bitcast i32 %status to float
675  ret float %fstatus
676}
677
678define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
679; GFX6-LABEL: buffer_load_v2i32_tfe:
680; GFX6:       ; %bb.0:
681; GFX6-NEXT:    v_mov_b32_e32 v2, 0
682; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
683; GFX6-NEXT:    s_mov_b32 s2, 0
684; GFX6-NEXT:    s_mov_b32 s3, 0xf000
685; GFX6-NEXT:    s_mov_b32 s0, s2
686; GFX6-NEXT:    s_mov_b32 s1, s2
687; GFX6-NEXT:    s_waitcnt vmcnt(0)
688; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
689; GFX6-NEXT:    v_mov_b32_e32 v0, v4
690; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
691; GFX6-NEXT:    ; return to shader part epilog
692;
693; GFX8PLUS-LABEL: buffer_load_v2i32_tfe:
694; GFX8PLUS:       ; %bb.0:
695; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
696; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
697; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
698; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
699; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
700; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
701; GFX8PLUS-NEXT:    ; return to shader part epilog
702;
703; GFX11-LABEL: buffer_load_v2i32_tfe:
704; GFX11:       ; %bb.0:
705; GFX11-NEXT:    v_mov_b32_e32 v2, 0
706; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
707; GFX11-NEXT:    s_waitcnt vmcnt(0)
708; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
709; GFX11-NEXT:    v_mov_b32_e32 v0, v4
710; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
711; GFX11-NEXT:    ; return to shader part epilog
712  %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
713  %data = extractvalue { <2 x i32>, i32 } %load, 0
714  store <2 x i32> %data, ptr addrspace(1) %out
715  %status = extractvalue { <2 x i32>, i32 } %load, 1
716  %fstatus = bitcast i32 %status to float
717  ret float %fstatus
718}
719
720define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
721; GFX6-LABEL: buffer_load_v2f32_tfe:
722; GFX6:       ; %bb.0:
723; GFX6-NEXT:    v_mov_b32_e32 v2, 0
724; GFX6-NEXT:    buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe
725; GFX6-NEXT:    s_mov_b32 s2, 0
726; GFX6-NEXT:    s_mov_b32 s3, 0xf000
727; GFX6-NEXT:    s_mov_b32 s0, s2
728; GFX6-NEXT:    s_mov_b32 s1, s2
729; GFX6-NEXT:    s_waitcnt vmcnt(0)
730; GFX6-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
731; GFX6-NEXT:    v_mov_b32_e32 v0, v4
732; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
733; GFX6-NEXT:    ; return to shader part epilog
734;
735; GFX8PLUS-LABEL: buffer_load_v2f32_tfe:
736; GFX8PLUS:       ; %bb.0:
737; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
738; GFX8PLUS-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
739; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
740; GFX8PLUS-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
741; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v4
742; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
743; GFX8PLUS-NEXT:    ; return to shader part epilog
744;
745; GFX11-LABEL: buffer_load_v2f32_tfe:
746; GFX11:       ; %bb.0:
747; GFX11-NEXT:    v_mov_b32_e32 v2, 0
748; GFX11-NEXT:    buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe
749; GFX11-NEXT:    s_waitcnt vmcnt(0)
750; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
751; GFX11-NEXT:    v_mov_b32_e32 v0, v4
752; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
753; GFX11-NEXT:    ; return to shader part epilog
754  %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
755  %data = extractvalue { <2 x float>, i32 } %load, 0
756  store <2 x float> %data, ptr addrspace(1) %out
757  %status = extractvalue { <2 x float>, i32 } %load, 1
758  %fstatus = bitcast i32 %status to float
759  ret float %fstatus
760}
761
762define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
763; GFX6-LABEL: buffer_load_i32_tfe:
764; GFX6:       ; %bb.0:
765; GFX6-NEXT:    v_mov_b32_e32 v2, 0
766; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
767; GFX6-NEXT:    s_mov_b32 s2, 0
768; GFX6-NEXT:    s_mov_b32 s3, 0xf000
769; GFX6-NEXT:    s_mov_b32 s0, s2
770; GFX6-NEXT:    s_mov_b32 s1, s2
771; GFX6-NEXT:    s_waitcnt vmcnt(0)
772; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
773; GFX6-NEXT:    v_mov_b32_e32 v0, v3
774; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
775; GFX6-NEXT:    ; return to shader part epilog
776;
777; GFX8PLUS-LABEL: buffer_load_i32_tfe:
778; GFX8PLUS:       ; %bb.0:
779; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
780; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
781; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
782; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
783; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
784; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
785; GFX8PLUS-NEXT:    ; return to shader part epilog
786;
787; GFX11-LABEL: buffer_load_i32_tfe:
788; GFX11:       ; %bb.0:
789; GFX11-NEXT:    v_mov_b32_e32 v2, 0
790; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
791; GFX11-NEXT:    s_waitcnt vmcnt(0)
792; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
793; GFX11-NEXT:    v_mov_b32_e32 v0, v3
794; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
795; GFX11-NEXT:    ; return to shader part epilog
796  %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
797  %data = extractvalue { i32, i32 } %load, 0
798  store i32 %data, ptr addrspace(1) %out
799  %status = extractvalue { i32, i32 } %load, 1
800  %fstatus = bitcast i32 %status to float
801  ret float %fstatus
802}
803
804define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) {
805; GFX6-LABEL: buffer_load_f32_tfe:
806; GFX6:       ; %bb.0:
807; GFX6-NEXT:    v_mov_b32_e32 v2, 0
808; GFX6-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
809; GFX6-NEXT:    s_mov_b32 s2, 0
810; GFX6-NEXT:    s_mov_b32 s3, 0xf000
811; GFX6-NEXT:    s_mov_b32 s0, s2
812; GFX6-NEXT:    s_mov_b32 s1, s2
813; GFX6-NEXT:    s_waitcnt vmcnt(0)
814; GFX6-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
815; GFX6-NEXT:    v_mov_b32_e32 v0, v3
816; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
817; GFX6-NEXT:    ; return to shader part epilog
818;
819; GFX8PLUS-LABEL: buffer_load_f32_tfe:
820; GFX8PLUS:       ; %bb.0:
821; GFX8PLUS-NEXT:    v_mov_b32_e32 v2, 0
822; GFX8PLUS-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
823; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
824; GFX8PLUS-NEXT:    flat_store_dword v[0:1], v2
825; GFX8PLUS-NEXT:    v_mov_b32_e32 v0, v3
826; GFX8PLUS-NEXT:    s_waitcnt vmcnt(0)
827; GFX8PLUS-NEXT:    ; return to shader part epilog
828;
829; GFX11-LABEL: buffer_load_f32_tfe:
830; GFX11:       ; %bb.0:
831; GFX11-NEXT:    v_mov_b32_e32 v2, 0
832; GFX11-NEXT:    buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe
833; GFX11-NEXT:    s_waitcnt vmcnt(0)
834; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
835; GFX11-NEXT:    v_mov_b32_e32 v0, v3
836; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
837; GFX11-NEXT:    ; return to shader part epilog
838  %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
839  %data = extractvalue { float, i32 } %load, 0
840  store float %data, ptr addrspace(1) %out
841  %status = extractvalue { float, i32 } %load, 1
842  %fstatus = bitcast i32 %status to float
843  ret float %fstatus
844}
845
846declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
847declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
848declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
849declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
850declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
851declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
852declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
853declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
854declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
855declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
856declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
857declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
858attributes #0 = { nounwind readonly }
859