xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s
4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s
5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX11 %s
6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s
7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s
8
9define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) {
10; PREGFX10-LABEL: tbuffer_load:
11; PREGFX10:       ; %bb.0: ; %main_body
12; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT]
13; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] glc
14; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] slc
15; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] glc
16; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
17; PREGFX10-NEXT:    ; return to shader part epilog
18;
19; GFX10-LABEL: tbuffer_load:
20; GFX10:       ; %bb.0: ; %main_body
21; GFX10-NEXT:    s_clause 0x3
22; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
23; GFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] glc
24; GFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
25; GFX10-NEXT:    tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
26; GFX10-NEXT:    s_waitcnt vmcnt(0)
27; GFX10-NEXT:    ; return to shader part epilog
28;
29; GFX11-LABEL: tbuffer_load:
30; GFX11:       ; %bb.0: ; %main_body
31; GFX11-NEXT:    s_clause 0x3
32; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78
33; GFX11-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] glc
34; GFX11-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc
35; GFX11-NEXT:    tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc
36; GFX11-NEXT:    s_waitcnt vmcnt(0)
37; GFX11-NEXT:    ; return to shader part epilog
38;
39; GFX12-LABEL: tbuffer_load:
40; GFX12:       ; %bb.0: ; %main_body
41; GFX12-NEXT:    s_clause 0x3
42; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78
43; GFX12-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] th:TH_LOAD_NT
44; GFX12-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_HT
45; GFX12-NEXT:    tbuffer_load_format_xyzw v[12:15], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_RT_NT
46; GFX12-NEXT:    s_wait_loadcnt 0x0
47; GFX12-NEXT:    ; return to shader part epilog
48main_body:
49    %vdata     = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 78, i32 0)
50    %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 63, i32 1)
51    %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 2)
52    %vdata_f32 = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 5)
53    %vdata.f     = bitcast <4 x i32> %vdata to <4 x float>
54    %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
55    %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
56    %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
57    %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
58    %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
59    %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3
60    ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3
61}
62
63define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) {
64; PREGFX10-LABEL: tbuffer_load_immoffs:
65; PREGFX10:       ; %bb.0: ; %main_body
66; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offset:42
67; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
68; PREGFX10-NEXT:    ; return to shader part epilog
69;
70; GFX10-LABEL: tbuffer_load_immoffs:
71; GFX10:       ; %bb.0: ; %main_body
72; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
73; GFX10-NEXT:    s_waitcnt vmcnt(0)
74; GFX10-NEXT:    ; return to shader part epilog
75;
76; GFX11-LABEL: tbuffer_load_immoffs:
77; GFX11:       ; %bb.0: ; %main_body
78; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42
79; GFX11-NEXT:    s_waitcnt vmcnt(0)
80; GFX11-NEXT:    ; return to shader part epilog
81;
82; GFX12-LABEL: tbuffer_load_immoffs:
83; GFX12:       ; %bb.0: ; %main_body
84; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78 offset:42
85; GFX12-NEXT:    s_wait_loadcnt 0x0
86; GFX12-NEXT:    ; return to shader part epilog
87main_body:
88    %vdata   = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 42, i32 0, i32 78, i32 0)
89    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
90    ret <4 x float> %vdata.f
91}
92
93define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) {
94; PREGFX10-LABEL: buffer_load_voffset_large_12bit:
95; PREGFX10:       ; %bb.0: ; %main_body
96; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092
97; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
98; PREGFX10-NEXT:    ; return to shader part epilog
99;
100; GFX10-LABEL: buffer_load_voffset_large_12bit:
101; GFX10:       ; %bb.0: ; %main_body
102; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092
103; GFX10-NEXT:    s_waitcnt vmcnt(0)
104; GFX10-NEXT:    ; return to shader part epilog
105;
106; GFX11-LABEL: buffer_load_voffset_large_12bit:
107; GFX11:       ; %bb.0: ; %main_body
108; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
109; GFX11-NEXT:    s_waitcnt vmcnt(0)
110; GFX11-NEXT:    ; return to shader part epilog
111;
112; GFX12-LABEL: buffer_load_voffset_large_12bit:
113; GFX12:       ; %bb.0: ; %main_body
114; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092
115; GFX12-NEXT:    s_wait_loadcnt 0x0
116; GFX12-NEXT:    ; return to shader part epilog
117main_body:
118  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0)
119  ret <4 x float> %data
120}
121
122define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) {
123; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit:
124; PREGFX10:       ; %bb.0: ; %main_body
125; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
126; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
127; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
128; PREGFX10-NEXT:    ; return to shader part epilog
129;
130; GFX10-LABEL: tbuffer_load_voffset_large_13bit:
131; GFX10:       ; %bb.0: ; %main_body
132; GFX10-NEXT:    v_mov_b32_e32 v0, 0x1000
133; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
134; GFX10-NEXT:    s_waitcnt vmcnt(0)
135; GFX10-NEXT:    ; return to shader part epilog
136;
137; GFX11-LABEL: tbuffer_load_voffset_large_13bit:
138; GFX11:       ; %bb.0: ; %main_body
139; GFX11-NEXT:    v_mov_b32_e32 v0, 0x1000
140; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
141; GFX11-NEXT:    s_waitcnt vmcnt(0)
142; GFX11-NEXT:    ; return to shader part epilog
143;
144; GFX12-LABEL: tbuffer_load_voffset_large_13bit:
145; GFX12:       ; %bb.0: ; %main_body
146; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8188
147; GFX12-NEXT:    s_wait_loadcnt 0x0
148; GFX12-NEXT:    ; return to shader part epilog
149main_body:
150  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0)
151  ret <4 x float> %data
152}
153
154define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) {
155; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit:
156; PREGFX10:       ; %bb.0: ; %main_body
157; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
158; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
159; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
160; PREGFX10-NEXT:    ; return to shader part epilog
161;
162; GFX10-LABEL: tbuffer_load_voffset_large_16bit:
163; GFX10:       ; %bb.0: ; %main_body
164; GFX10-NEXT:    v_mov_b32_e32 v0, 0xf000
165; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
166; GFX10-NEXT:    s_waitcnt vmcnt(0)
167; GFX10-NEXT:    ; return to shader part epilog
168;
169; GFX11-LABEL: tbuffer_load_voffset_large_16bit:
170; GFX11:       ; %bb.0: ; %main_body
171; GFX11-NEXT:    v_mov_b32_e32 v0, 0xf000
172; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
173; GFX11-NEXT:    s_waitcnt vmcnt(0)
174; GFX11-NEXT:    ; return to shader part epilog
175;
176; GFX12-LABEL: tbuffer_load_voffset_large_16bit:
177; GFX12:       ; %bb.0: ; %main_body
178; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:65532
179; GFX12-NEXT:    s_wait_loadcnt 0x0
180; GFX12-NEXT:    ; return to shader part epilog
181main_body:
182  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0)
183  ret <4 x float> %data
184}
185
186define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) {
187; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit:
188; PREGFX10:       ; %bb.0: ; %main_body
189; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
190; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
191; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
192; PREGFX10-NEXT:    ; return to shader part epilog
193;
194; GFX10-LABEL: tbuffer_load_voffset_large_23bit:
195; GFX10:       ; %bb.0: ; %main_body
196; GFX10-NEXT:    v_mov_b32_e32 v0, 0x7ff000
197; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
198; GFX10-NEXT:    s_waitcnt vmcnt(0)
199; GFX10-NEXT:    ; return to shader part epilog
200;
201; GFX11-LABEL: tbuffer_load_voffset_large_23bit:
202; GFX11:       ; %bb.0: ; %main_body
203; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7ff000
204; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
205; GFX11-NEXT:    s_waitcnt vmcnt(0)
206; GFX11-NEXT:    ; return to shader part epilog
207;
208; GFX12-LABEL: tbuffer_load_voffset_large_23bit:
209; GFX12:       ; %bb.0: ; %main_body
210; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8388604
211; GFX12-NEXT:    s_wait_loadcnt 0x0
212; GFX12-NEXT:    ; return to shader part epilog
213main_body:
214  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0)
215  ret <4 x float> %data
216}
217
218define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) {
219; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit:
220; PREGFX10:       ; %bb.0: ; %main_body
221; PREGFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
222; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092
223; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
224; PREGFX10-NEXT:    ; return to shader part epilog
225;
226; GFX10-LABEL: tbuffer_load_voffset_large_24bit:
227; GFX10:       ; %bb.0: ; %main_body
228; GFX10-NEXT:    v_mov_b32_e32 v0, 0xfff000
229; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092
230; GFX10-NEXT:    s_waitcnt vmcnt(0)
231; GFX10-NEXT:    ; return to shader part epilog
232;
233; GFX11-LABEL: tbuffer_load_voffset_large_24bit:
234; GFX11:       ; %bb.0: ; %main_body
235; GFX11-NEXT:    v_mov_b32_e32 v0, 0xfff000
236; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092
237; GFX11-NEXT:    s_waitcnt vmcnt(0)
238; GFX11-NEXT:    ; return to shader part epilog
239;
240; GFX12-LABEL: tbuffer_load_voffset_large_24bit:
241; GFX12:       ; %bb.0: ; %main_body
242; GFX12-NEXT:    v_mov_b32_e32 v0, 0x800000
243; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:8388604
244; GFX12-NEXT:    s_wait_loadcnt 0x0
245; GFX12-NEXT:    ; return to shader part epilog
246main_body:
247  %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0)
248  ret <4 x float> %data
249}
250
251define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
252; PREGFX10-LABEL: tbuffer_load_immoffs_large:
253; PREGFX10:       ; %bb.0:
254; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
255; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
256; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
257; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
258; PREGFX10-NEXT:    ; return to shader part epilog
259;
260; GFX10-LABEL: tbuffer_load_immoffs_large:
261; GFX10:       ; %bb.0:
262; GFX10-NEXT:    s_clause 0x2
263; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
264; GFX10-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_UINT] offset:73
265; GFX10-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] offset:1
266; GFX10-NEXT:    s_waitcnt vmcnt(0)
267; GFX10-NEXT:    ; return to shader part epilog
268;
269; GFX11-LABEL: tbuffer_load_immoffs_large:
270; GFX11:       ; %bb.0:
271; GFX11-NEXT:    s_clause 0x2
272; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] offset:4095
273; GFX11-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73
274; GFX11-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1
275; GFX11-NEXT:    s_waitcnt vmcnt(0)
276; GFX11-NEXT:    ; return to shader part epilog
277;
278; GFX12-LABEL: tbuffer_load_immoffs_large:
279; GFX12:       ; %bb.0:
280; GFX12-NEXT:    s_mov_b32 s5, 61
281; GFX12-NEXT:    s_clause 0x2
282; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], off, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] offset:4095
283; GFX12-NEXT:    tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73
284; GFX12-NEXT:    tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1
285; GFX12-NEXT:    s_wait_loadcnt 0x0
286; GFX12-NEXT:    ; return to shader part epilog
287    %vdata     = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 4095, i32 61, i32 47, i32 0)
288    %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 73, i32 %soffs, i32 62, i32 0)
289    %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 1, i32 %soffs, i32 77, i32 0)
290    %vdata.f     = bitcast <4 x i32> %vdata to <4 x float>
291    %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float>
292    %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float>
293    %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0
294    %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1
295    %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2
296    ret {<4 x float>, <4 x float>, <4 x float>} %r2
297}
298
299define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) {
300; PREGFX10-LABEL: tbuffer_load_ofs:
301; PREGFX10:       ; %bb.0: ; %main_body
302; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen
303; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
304; PREGFX10-NEXT:    ; return to shader part epilog
305;
306; GFX10-LABEL: tbuffer_load_ofs:
307; GFX10:       ; %bb.0: ; %main_body
308; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
309; GFX10-NEXT:    s_waitcnt vmcnt(0)
310; GFX10-NEXT:    ; return to shader part epilog
311;
312; GFX11-LABEL: tbuffer_load_ofs:
313; GFX11:       ; %bb.0: ; %main_body
314; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen
315; GFX11-NEXT:    s_waitcnt vmcnt(0)
316; GFX11-NEXT:    ; return to shader part epilog
317;
318; GFX12-LABEL: tbuffer_load_ofs:
319; GFX12:       ; %bb.0: ; %main_body
320; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen
321; GFX12-NEXT:    s_wait_loadcnt 0x0
322; GFX12-NEXT:    ; return to shader part epilog
323main_body:
324    %vdata   = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %voffs, i32 0, i32 78, i32 0)
325    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
326    ret <4 x float> %vdata.f
327}
328
329define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) {
330; PREGFX10-LABEL: tbuffer_load_ofs_imm:
331; PREGFX10:       ; %bb.0: ; %main_body
332; PREGFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen offset:52
333; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
334; PREGFX10-NEXT:    ; return to shader part epilog
335;
336; GFX10-LABEL: tbuffer_load_ofs_imm:
337; GFX10:       ; %bb.0: ; %main_body
338; GFX10-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
339; GFX10-NEXT:    s_waitcnt vmcnt(0)
340; GFX10-NEXT:    ; return to shader part epilog
341;
342; GFX11-LABEL: tbuffer_load_ofs_imm:
343; GFX11:       ; %bb.0: ; %main_body
344; GFX11-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52
345; GFX11-NEXT:    s_waitcnt vmcnt(0)
346; GFX11-NEXT:    ; return to shader part epilog
347;
348; GFX12-LABEL: tbuffer_load_ofs_imm:
349; GFX12:       ; %bb.0: ; %main_body
350; GFX12-NEXT:    tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen offset:52
351; GFX12-NEXT:    s_wait_loadcnt 0x0
352; GFX12-NEXT:    ; return to shader part epilog
353main_body:
354    %ofs = add i32 %voffs, 52
355    %vdata   = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %ofs, i32 0, i32 78, i32 0)
356    %vdata.f = bitcast <4 x i32> %vdata to <4 x float>
357    ret <4 x float> %vdata.f
358}
359
360define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) {
361; PREGFX10-LABEL: buffer_load_xy:
362; PREGFX10:       ; %bb.0:
363; PREGFX10-NEXT:    tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
364; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
365; PREGFX10-NEXT:    ; return to shader part epilog
366;
367; GFX10-LABEL: buffer_load_xy:
368; GFX10:       ; %bb.0:
369; GFX10-NEXT:    tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
370; GFX10-NEXT:    s_waitcnt vmcnt(0)
371; GFX10-NEXT:    ; return to shader part epilog
372;
373; GFX11-LABEL: buffer_load_xy:
374; GFX11:       ; %bb.0:
375; GFX11-NEXT:    tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:77
376; GFX11-NEXT:    s_waitcnt vmcnt(0)
377; GFX11-NEXT:    ; return to shader part epilog
378;
379; GFX12-LABEL: buffer_load_xy:
380; GFX12:       ; %bb.0:
381; GFX12-NEXT:    tbuffer_load_format_xy v[0:1], off, s[0:3], null format:77
382; GFX12-NEXT:    s_wait_loadcnt 0x0
383; GFX12-NEXT:    ; return to shader part epilog
384    %vdata = call <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
385    %vdata.f = bitcast <2 x i32> %vdata to <2 x float>
386    ret <2 x float> %vdata.f
387}
388
389define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
390; PREGFX10-LABEL: buffer_load_x:
391; PREGFX10:       ; %bb.0:
392; PREGFX10-NEXT:    tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT]
393; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
394; PREGFX10-NEXT:    ; return to shader part epilog
395;
396; GFX10-LABEL: buffer_load_x:
397; GFX10:       ; %bb.0:
398; GFX10-NEXT:    tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT]
399; GFX10-NEXT:    s_waitcnt vmcnt(0)
400; GFX10-NEXT:    ; return to shader part epilog
401;
402; GFX11-LABEL: buffer_load_x:
403; GFX11:       ; %bb.0:
404; GFX11-NEXT:    tbuffer_load_format_x v0, off, s[0:3], 0 format:77
405; GFX11-NEXT:    s_waitcnt vmcnt(0)
406; GFX11-NEXT:    ; return to shader part epilog
407;
408; GFX12-LABEL: buffer_load_x:
409; GFX12:       ; %bb.0:
410; GFX12-NEXT:    tbuffer_load_format_x v0, off, s[0:3], null format:77
411; GFX12-NEXT:    s_wait_loadcnt 0x0
412; GFX12-NEXT:    ; return to shader part epilog
413    %vdata = call i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0)
414    %vdata.f = bitcast i32 %vdata to float
415    ret float %vdata.f
416}
417
418declare i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32)
419declare <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32)
420declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32)
421declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)
422