xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s
4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s
5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GFX11 %s
6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
8
9define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
10; PREGFX10-LABEL: tbuffer_load:
11; PREGFX10:       ; %bb.0: ; %main_body
12; PREGFX10-NEXT:    v_mov_b32_e32 v12, 0
13; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen
14; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen glc
15; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen slc
16; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen glc
17; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
18; PREGFX10-NEXT:    ; return to shader part epilog
19;
20; GFX10-LABEL: tbuffer_load:
21; GFX10:       ; %bb.0: ; %main_body
22; GFX10-NEXT:    v_mov_b32_e32 v16, 0
23; GFX10-NEXT:    s_clause 0x3
24; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v16, s[0:3], 0 format:78 idxen
25; GFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], v16, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen glc
26; GFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc
27; GFX10-NEXT:    tbuffer_load_format_xyzw v[12:15], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc
28; GFX10-NEXT:    s_waitcnt vmcnt(0)
29; GFX10-NEXT:    ; return to shader part epilog
30;
31; GFX11-LABEL: tbuffer_load:
32; GFX11:       ; %bb.0: ; %main_body
33; GFX11-NEXT:    v_mov_b32_e32 v12, 0
34; GFX11-NEXT:    s_clause 0x3
35; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:78 idxen
36; GFX11-NEXT:    tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen glc
37; GFX11-NEXT:    tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc
38; GFX11-NEXT:    tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc
39; GFX11-NEXT:    s_waitcnt vmcnt(0)
40; GFX11-NEXT:    ; return to shader part epilog
41;
42; GFX12-LABEL: tbuffer_load:
43; GFX12:       ; %bb.0: ; %main_body
44; GFX12-NEXT:    v_mov_b32_e32 v12, 0
45; GFX12-NEXT:    s_clause 0x3
46; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v12, s[0:3], null format:78 idxen
47; GFX12-NEXT:    tbuffer_load_format_xyzw v[4:7], v12, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen th:TH_LOAD_NT
48; GFX12-NEXT:    tbuffer_load_format_xyzw v[8:11], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_HT
49; GFX12-NEXT:    tbuffer_load_format_xyzw v[12:15], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_RT_NT
50; GFX12-NEXT:    s_wait_loadcnt 0x0
51; GFX12-NEXT:    ; return to shader part epilog
52main_body:
53    %vdata     = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0)
54    %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 63, i32 1)
55    %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 2)
56    %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 5)
57    %vdata.f     = bitcast <4 x i32> %vdata to <4 x float>
58    %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
59    %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
60    %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
61    %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
62    %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
63    %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
64    ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
65}
66
67define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
68; PREGFX10-LABEL: tbuffer_load_immoffs:
69; PREGFX10:       ; %bb.0: ; %main_body
70; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0
71; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:42
72; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
73; PREGFX10-NEXT:    ; return to shader part epilog
74;
75; GFX10-LABEL: tbuffer_load_immoffs:
76; GFX10:       ; %bb.0: ; %main_body
77; GFX10-NEXT:    v_mov_b32_e32 v0, 0
78; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42
79; GFX10-NEXT:    s_waitcnt vmcnt(0)
80; GFX10-NEXT:    ; return to shader part epilog
81;
82; GFX11-LABEL: tbuffer_load_immoffs:
83; GFX11:       ; %bb.0: ; %main_body
84; GFX11-NEXT:    v_mov_b32_e32 v0, 0
85; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42
86; GFX11-NEXT:    s_waitcnt vmcnt(0)
87; GFX11-NEXT:    ; return to shader part epilog
88;
89; GFX12-LABEL: tbuffer_load_immoffs:
90; GFX12:       ; %bb.0: ; %main_body
91; GFX12-NEXT:    v_mov_b32_e32 v0, 0
92; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen offset:42
93; GFX12-NEXT:    s_wait_loadcnt 0x0
94; GFX12-NEXT:    ; return to shader part epilog
95main_body:
96    %vdata   = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 78, i32 0)
97    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
98    ret <4 x float> %vdata.f
99}
100
101define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
102; PREGFX10-LABEL: tbuffer_load_immoffs_large:
103; PREGFX10:       ; %bb.0:
104; PREGFX10-NEXT:    v_mov_b32_e32 v8, 0
105; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
106; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73
107; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1
108; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
109; PREGFX10-NEXT:    ; return to shader part epilog
110;
111; GFX10-LABEL: tbuffer_load_immoffs_large:
112; GFX10:       ; %bb.0:
113; GFX10-NEXT:    v_mov_b32_e32 v12, 0
114; GFX10-NEXT:    s_clause 0x2
115; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095
116; GFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], v12, s[0:3], s4 format:[BUF_FMT_32_32_UINT] idxen offset:73
117; GFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], v12, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:1
118; GFX10-NEXT:    s_waitcnt vmcnt(0)
119; GFX10-NEXT:    ; return to shader part epilog
120;
121; GFX11-LABEL: tbuffer_load_immoffs_large:
122; GFX11:       ; %bb.0:
123; GFX11-NEXT:    v_mov_b32_e32 v8, 0
124; GFX11-NEXT:    s_clause 0x2
125; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095
126; GFX11-NEXT:    tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73
127; GFX11-NEXT:    tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1
128; GFX11-NEXT:    s_waitcnt vmcnt(0)
129; GFX11-NEXT:    ; return to shader part epilog
130;
131; GFX12-LABEL: tbuffer_load_immoffs_large:
132; GFX12:       ; %bb.0:
133; GFX12-NEXT:    v_mov_b32_e32 v8, 0
134; GFX12-NEXT:    s_mov_b32 s5, 61
135; GFX12-NEXT:    s_clause 0x2
136; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v8, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095
137; GFX12-NEXT:    tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73
138; GFX12-NEXT:    tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1
139; GFX12-NEXT:    s_wait_loadcnt 0x0
140; GFX12-NEXT:    ; return to shader part epilog
141    %vdata     = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 4095, i32 61, i32 47, i32 0)
142    %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 73, i32 %soffs, i32 62, i32 0)
143    %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 1, i32 %soffs, i32 77, i32 0)
144    %vdata.f     = bitcast <4 x i32> %vdata to <4 x float>
145    %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
146    %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
147    %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
148    %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
149    %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
150    ret {<4 x float>, <4 x float>, <4 x float>} %r2
151}
152
153define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) {
154; PREGFX10-LABEL: tbuffer_load_idx:
155; PREGFX10:       ; %bb.0: ; %main_body
156; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen
157; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
158; PREGFX10-NEXT:    ; return to shader part epilog
159;
160; GFX10-LABEL: tbuffer_load_idx:
161; GFX10:       ; %bb.0: ; %main_body
162; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen
163; GFX10-NEXT:    s_waitcnt vmcnt(0)
164; GFX10-NEXT:    ; return to shader part epilog
165;
166; GFX11-LABEL: tbuffer_load_idx:
167; GFX11:       ; %bb.0: ; %main_body
168; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen
169; GFX11-NEXT:    s_waitcnt vmcnt(0)
170; GFX11-NEXT:    ; return to shader part epilog
171;
172; GFX12-LABEL: tbuffer_load_idx:
173; GFX12:       ; %bb.0: ; %main_body
174; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen
175; GFX12-NEXT:    s_wait_loadcnt 0x0
176; GFX12-NEXT:    ; return to shader part epilog
177main_body:
178    %vdata   = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0)
179    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
180    ret <4 x float> %vdata.f
181}
182
183define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
184; PREGFX10-LABEL: tbuffer_load_ofs:
185; PREGFX10:       ; %bb.0: ; %main_body
186; PREGFX10-NEXT:    s_mov_b32 s4, 0
187; PREGFX10-NEXT:    v_mov_b32_e32 v1, v0
188; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
189; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen
190; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
191; PREGFX10-NEXT:    ; return to shader part epilog
192;
193; GFX10-LABEL: tbuffer_load_ofs:
194; GFX10:       ; %bb.0: ; %main_body
195; GFX10-NEXT:    s_mov_b32 s4, 0
196; GFX10-NEXT:    v_mov_b32_e32 v1, v0
197; GFX10-NEXT:    v_mov_b32_e32 v0, s4
198; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
199; GFX10-NEXT:    s_waitcnt vmcnt(0)
200; GFX10-NEXT:    ; return to shader part epilog
201;
202; GFX11-LABEL: tbuffer_load_ofs:
203; GFX11:       ; %bb.0: ; %main_body
204; GFX11-NEXT:    s_mov_b32 s4, 0
205; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
206; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
207; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
208; GFX11-NEXT:    s_waitcnt vmcnt(0)
209; GFX11-NEXT:    ; return to shader part epilog
210;
211; GFX12-LABEL: tbuffer_load_ofs:
212; GFX12:       ; %bb.0: ; %main_body
213; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
214; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen
215; GFX12-NEXT:    s_wait_loadcnt 0x0
216; GFX12-NEXT:    ; return to shader part epilog
217main_body:
218    %vdata   = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0)
219    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
220    ret <4 x float> %vdata.f
221}
222
223define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
224; PREGFX10-LABEL: tbuffer_load_ofs_imm:
225; PREGFX10:       ; %bb.0: ; %main_body
226; PREGFX10-NEXT:    s_mov_b32 s4, 0
227; PREGFX10-NEXT:    v_mov_b32_e32 v1, v0
228; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
229; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen offset:52
230; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
231; PREGFX10-NEXT:    ; return to shader part epilog
232;
233; GFX10-LABEL: tbuffer_load_ofs_imm:
234; GFX10:       ; %bb.0: ; %main_body
235; GFX10-NEXT:    s_mov_b32 s4, 0
236; GFX10-NEXT:    v_mov_b32_e32 v1, v0
237; GFX10-NEXT:    v_mov_b32_e32 v0, s4
238; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52
239; GFX10-NEXT:    s_waitcnt vmcnt(0)
240; GFX10-NEXT:    ; return to shader part epilog
241;
242; GFX11-LABEL: tbuffer_load_ofs_imm:
243; GFX11:       ; %bb.0: ; %main_body
244; GFX11-NEXT:    s_mov_b32 s4, 0
245; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
246; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
247; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52
248; GFX11-NEXT:    s_waitcnt vmcnt(0)
249; GFX11-NEXT:    ; return to shader part epilog
250;
251; GFX12-LABEL: tbuffer_load_ofs_imm:
252; GFX12:       ; %bb.0: ; %main_body
253; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0
254; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen offset:52
255; GFX12-NEXT:    s_wait_loadcnt 0x0
256; GFX12-NEXT:    ; return to shader part epilog
257main_body:
258    %ofs = add i32 %voffs, 52
259    %vdata   = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %ofs, i32 0, i32 78, i32 0)
260    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
261    ret <4 x float> %vdata.f
262}
263
264define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) {
265; PREGFX10-LABEL: tbuffer_load_both:
266; PREGFX10:       ; %bb.0: ; %main_body
267; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen
268; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
269; PREGFX10-NEXT:    ; return to shader part epilog
270;
271; GFX10-LABEL: tbuffer_load_both:
272; GFX10:       ; %bb.0: ; %main_body
273; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
274; GFX10-NEXT:    s_waitcnt vmcnt(0)
275; GFX10-NEXT:    ; return to shader part epilog
276;
277; GFX11-LABEL: tbuffer_load_both:
278; GFX11:       ; %bb.0: ; %main_body
279; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen
280; GFX11-NEXT:    s_waitcnt vmcnt(0)
281; GFX11-NEXT:    ; return to shader part epilog
282;
283; GFX12-LABEL: tbuffer_load_both:
284; GFX12:       ; %bb.0: ; %main_body
285; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen
286; GFX12-NEXT:    s_wait_loadcnt 0x0
287; GFX12-NEXT:    ; return to shader part epilog
288main_body:
289    %vdata   = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0)
290    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
291    ret <4 x float> %vdata.f
292}
293
294define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
295; PREGFX10-LABEL: buffer_load_xy:
296; PREGFX10:       ; %bb.0:
297; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0
298; PREGFX10-NEXT:    tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen
299; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
300; PREGFX10-NEXT:    ; return to shader part epilog
301;
302; GFX10-LABEL: buffer_load_xy:
303; GFX10:       ; %bb.0:
304; GFX10-NEXT:    v_mov_b32_e32 v0, 0
305; GFX10-NEXT:    tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen
306; GFX10-NEXT:    s_waitcnt vmcnt(0)
307; GFX10-NEXT:    ; return to shader part epilog
308;
309; GFX11-LABEL: buffer_load_xy:
310; GFX11:       ; %bb.0:
311; GFX11-NEXT:    v_mov_b32_e32 v0, 0
312; GFX11-NEXT:    tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:77 idxen
313; GFX11-NEXT:    s_waitcnt vmcnt(0)
314; GFX11-NEXT:    ; return to shader part epilog
315;
316; GFX12-LABEL: buffer_load_xy:
317; GFX12:       ; %bb.0:
318; GFX12-NEXT:    v_mov_b32_e32 v0, 0
319; GFX12-NEXT:    tbuffer_load_format_xy v[0:1], v0, s[0:3], null format:77 idxen
320; GFX12-NEXT:    s_wait_loadcnt 0x0
321; GFX12-NEXT:    ; return to shader part epilog
322    %vdata = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
323    %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
324    ret <2 x float> %vdata.f
325}
326
327define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
328; PREGFX10-LABEL: buffer_load_x:
329; PREGFX10:       ; %bb.0:
330; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0
331; PREGFX10-NEXT:    tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen
332; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
333; PREGFX10-NEXT:    ; return to shader part epilog
334;
335; GFX10-LABEL: buffer_load_x:
336; GFX10:       ; %bb.0:
337; GFX10-NEXT:    v_mov_b32_e32 v0, 0
338; GFX10-NEXT:    tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen
339; GFX10-NEXT:    s_waitcnt vmcnt(0)
340; GFX10-NEXT:    ; return to shader part epilog
341;
342; GFX11-LABEL: buffer_load_x:
343; GFX11:       ; %bb.0:
344; GFX11-NEXT:    v_mov_b32_e32 v0, 0
345; GFX11-NEXT:    tbuffer_load_format_x v0, v0, s[0:3], 0 format:77 idxen
346; GFX11-NEXT:    s_waitcnt vmcnt(0)
347; GFX11-NEXT:    ; return to shader part epilog
348;
349; GFX12-LABEL: buffer_load_x:
350; GFX12:       ; %bb.0:
351; GFX12-NEXT:    v_mov_b32_e32 v0, 0
352; GFX12-NEXT:    tbuffer_load_format_x v0, v0, s[0:3], null format:77 idxen
353; GFX12-NEXT:    s_wait_loadcnt 0x0
354; GFX12-NEXT:    ; return to shader part epilog
355    %vdata = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0)
356    %vdata.f = bitcast i32 %vdata to float
357    ret float %vdata.f
358}
359
360define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
361; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
362; PREGFX10:       ; %bb.0: ; %main_body
363; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0
364; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092
365; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
366; PREGFX10-NEXT:    ; return to shader part epilog
367;
368; GFX10-LABEL: buffer_load_voffset_large_12bit:
369; GFX10:       ; %bb.0: ; %main_body
370; GFX10-NEXT:    v_mov_b32_e32 v0, 0
371; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092
372; GFX10-NEXT:    s_waitcnt vmcnt(0)
373; GFX10-NEXT:    ; return to shader part epilog
374;
375; GFX11-LABEL: buffer_load_voffset_large_12bit:
376; GFX11:       ; %bb.0: ; %main_body
377; GFX11-NEXT:    v_mov_b32_e32 v0, 0
378; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
379; GFX11-NEXT:    s_waitcnt vmcnt(0)
380; GFX11-NEXT:    ; return to shader part epilog
381;
382; GFX12-LABEL: buffer_load_voffset_large_12bit:
383; GFX12:       ; %bb.0: ; %main_body
384; GFX12-NEXT:    v_mov_b32_e32 v0, 0
385; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092
386; GFX12-NEXT:    s_wait_loadcnt 0x0
387; GFX12-NEXT:    ; return to shader part epilog
388main_body:
389  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 63, i32 0)
390  ret <4 x float> %data
391}
392
393define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
394; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
395; PREGFX10:       ; %bb.0: ; %main_body
396; PREGFX10-NEXT:    s_mov_b32 s4, 0
397; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0x1000
398; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
399; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
400; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
401; PREGFX10-NEXT:    ; return to shader part epilog
402;
403; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
404; GFX10:       ; %bb.0: ; %main_body
405; GFX10-NEXT:    s_mov_b32 s4, 0
406; GFX10-NEXT:    v_mov_b32_e32 v1, 0x1000
407; GFX10-NEXT:    v_mov_b32_e32 v0, s4
408; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
409; GFX10-NEXT:    s_waitcnt vmcnt(0)
410; GFX10-NEXT:    ; return to shader part epilog
411;
412; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
413; GFX11:       ; %bb.0: ; %main_body
414; GFX11-NEXT:    s_mov_b32 s4, 0
415; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
416; GFX11-NEXT:    v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4
417; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
418; GFX11-NEXT:    s_waitcnt vmcnt(0)
419; GFX11-NEXT:    ; return to shader part epilog
420;
421; GFX12-LABEL: tbuffer_load_voffset_large_13bit:
422; GFX12:       ; %bb.0: ; %main_body
423; GFX12-NEXT:    v_mov_b32_e32 v0, 0
424; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8188
425; GFX12-NEXT:    s_wait_loadcnt 0x0
426; GFX12-NEXT:    ; return to shader part epilog
427main_body:
428  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 63, i32 0)
429  ret <4 x float> %data
430}
431
432define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
433; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
434; PREGFX10:       ; %bb.0: ; %main_body
435; PREGFX10-NEXT:    s_mov_b32 s4, 0
436; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0xf000
437; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
438; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
439; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
440; PREGFX10-NEXT:    ; return to shader part epilog
441;
442; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
443; GFX10:       ; %bb.0: ; %main_body
444; GFX10-NEXT:    s_mov_b32 s4, 0
445; GFX10-NEXT:    v_mov_b32_e32 v1, 0xf000
446; GFX10-NEXT:    v_mov_b32_e32 v0, s4
447; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
448; GFX10-NEXT:    s_waitcnt vmcnt(0)
449; GFX10-NEXT:    ; return to shader part epilog
450;
451; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
452; GFX11:       ; %bb.0: ; %main_body
453; GFX11-NEXT:    s_mov_b32 s4, 0
454; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
455; GFX11-NEXT:    v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4
456; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
457; GFX11-NEXT:    s_waitcnt vmcnt(0)
458; GFX11-NEXT:    ; return to shader part epilog
459;
460; GFX12-LABEL: tbuffer_load_voffset_large_16bit:
461; GFX12:       ; %bb.0: ; %main_body
462; GFX12-NEXT:    v_mov_b32_e32 v0, 0
463; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:65532
464; GFX12-NEXT:    s_wait_loadcnt 0x0
465; GFX12-NEXT:    ; return to shader part epilog
466main_body:
467  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 63, i32 0)
468  ret <4 x float> %data
469}
470
471define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
472; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
473; PREGFX10:       ; %bb.0: ; %main_body
474; PREGFX10-NEXT:    s_mov_b32 s4, 0
475; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0x7ff000
476; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
477; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
478; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
479; PREGFX10-NEXT:    ; return to shader part epilog
480;
481; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
482; GFX10:       ; %bb.0: ; %main_body
483; GFX10-NEXT:    s_mov_b32 s4, 0
484; GFX10-NEXT:    v_mov_b32_e32 v1, 0x7ff000
485; GFX10-NEXT:    v_mov_b32_e32 v0, s4
486; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
487; GFX10-NEXT:    s_waitcnt vmcnt(0)
488; GFX10-NEXT:    ; return to shader part epilog
489;
490; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
491; GFX11:       ; %bb.0: ; %main_body
492; GFX11-NEXT:    s_mov_b32 s4, 0
493; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
494; GFX11-NEXT:    v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4
495; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
496; GFX11-NEXT:    s_waitcnt vmcnt(0)
497; GFX11-NEXT:    ; return to shader part epilog
498;
499; GFX12-LABEL: tbuffer_load_voffset_large_23bit:
500; GFX12:       ; %bb.0: ; %main_body
501; GFX12-NEXT:    v_mov_b32_e32 v0, 0
502; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8388604
503; GFX12-NEXT:    s_wait_loadcnt 0x0
504; GFX12-NEXT:    ; return to shader part epilog
505main_body:
506  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 63, i32 0)
507  ret <4 x float> %data
508}
509
510define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
511; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
512; PREGFX10:       ; %bb.0: ; %main_body
513; PREGFX10-NEXT:    s_mov_b32 s4, 0
514; PREGFX10-NEXT:    v_mov_b32_e32 v1, 0xfff000
515; PREGFX10-NEXT:    v_mov_b32_e32 v0, s4
516; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092
517; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
518; PREGFX10-NEXT:    ; return to shader part epilog
519;
520; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
521; GFX10:       ; %bb.0: ; %main_body
522; GFX10-NEXT:    s_mov_b32 s4, 0
523; GFX10-NEXT:    v_mov_b32_e32 v1, 0xfff000
524; GFX10-NEXT:    v_mov_b32_e32 v0, s4
525; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092
526; GFX10-NEXT:    s_waitcnt vmcnt(0)
527; GFX10-NEXT:    ; return to shader part epilog
528;
529; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
530; GFX11:       ; %bb.0: ; %main_body
531; GFX11-NEXT:    s_mov_b32 s4, 0
532; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
533; GFX11-NEXT:    v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4
534; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092
535; GFX11-NEXT:    s_waitcnt vmcnt(0)
536; GFX11-NEXT:    ; return to shader part epilog
537;
538; GFX12-SDAG-LABEL: tbuffer_load_voffset_large_24bit:
539; GFX12-SDAG:       ; %bb.0: ; %main_body
540; GFX12-SDAG-NEXT:    v_dual_mov_b32 v1, 0x800000 :: v_dual_mov_b32 v0, 0
541; GFX12-SDAG-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604
542; GFX12-SDAG-NEXT:    s_wait_loadcnt 0x0
543; GFX12-SDAG-NEXT:    ; return to shader part epilog
544;
545; GFX12-GISEL-LABEL: tbuffer_load_voffset_large_24bit:
546; GFX12-GISEL:       ; %bb.0: ; %main_body
547; GFX12-GISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x800000
548; GFX12-GISEL-NEXT:    tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604
549; GFX12-GISEL-NEXT:    s_wait_loadcnt 0x0
550; GFX12-GISEL-NEXT:    ; return to shader part epilog
551main_body:
552  %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
553  ret <4 x float> %data
554}
555
556declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32)
557declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32)
558declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32)
559declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32)
560