xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-PACKED %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED %s
7; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
8; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-SDAG %s
9; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-PACKED,GFX12-PACKED-GISEL %s
10
11define amdgpu_kernel void @tbuffer_store_d16_x(<4 x i32> %rsrc, half %data, i32 %vindex) {
12; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
13; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
14; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
15; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
16; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
17; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
18; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s5
19; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
20; PREGFX10-UNPACKED-NEXT:    s_endpgm
21;
22; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
23; PREGFX10-PACKED:       ; %bb.0: ; %main_body
24; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
25; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
26; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
27; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
28; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
29; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
30; PREGFX10-PACKED-NEXT:    s_endpgm
31;
32; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
33; GFX10-PACKED:       ; %bb.0: ; %main_body
34; GFX10-PACKED-NEXT:    s_clause 0x1
35; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
36; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
37; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
38; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
39; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
40; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
41; GFX10-PACKED-NEXT:    s_endpgm
42;
43; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
44; GFX11-PACKED:       ; %bb.0: ; %main_body
45; GFX11-PACKED-NEXT:    s_clause 0x1
46; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
47; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
48; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
49; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
50; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
51; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
52; GFX11-PACKED-NEXT:    s_endpgm
53;
54; GFX12-PACKED-LABEL: tbuffer_store_d16_x:
55; GFX12-PACKED:       ; %bb.0: ; %main_body
56; GFX12-PACKED-NEXT:    s_clause 0x1
57; GFX12-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
58; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
59; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
60; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s6
61; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s7
62; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
63; GFX12-PACKED-NEXT:    s_endpgm
64main_body:
65  call void @llvm.amdgcn.struct.tbuffer.store.f16(half %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
66  ret void
67}
68
69define amdgpu_kernel void @tbuffer_store_d16_xy(<4 x i32> %rsrc, <2 x half> %data, i32 %vindex) {
70; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
71; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
72; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
73; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
74; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
75; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s4, 16
76; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
77; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
78; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s6
79; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
80; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
81; PREGFX10-UNPACKED-NEXT:    s_endpgm
82;
83; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
84; PREGFX10-PACKED:       ; %bb.0: ; %main_body
85; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
86; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
87; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
88; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
89; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
90; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
91; PREGFX10-PACKED-NEXT:    s_endpgm
92;
93; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
94; GFX10-PACKED:       ; %bb.0: ; %main_body
95; GFX10-PACKED-NEXT:    s_clause 0x1
96; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
97; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
98; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
99; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
100; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
101; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
102; GFX10-PACKED-NEXT:    s_endpgm
103;
104; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
105; GFX11-PACKED:       ; %bb.0: ; %main_body
106; GFX11-PACKED-NEXT:    s_clause 0x1
107; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
108; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
109; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
110; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
111; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
112; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
113; GFX11-PACKED-NEXT:    s_endpgm
114;
115; GFX12-PACKED-LABEL: tbuffer_store_d16_xy:
116; GFX12-PACKED:       ; %bb.0: ; %main_body
117; GFX12-PACKED-NEXT:    s_clause 0x1
118; GFX12-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
119; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
120; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
121; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s6
122; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s7
123; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
124; GFX12-PACKED-NEXT:    s_endpgm
125main_body:
126  call void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
127  ret void
128}
129
130define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
131; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
132; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
133; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
134; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
135; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
136; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
137; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
138; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s4, 16
139; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
140; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
141; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s7
142; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
143; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s6
144; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
145; PREGFX10-UNPACKED-NEXT:    s_endpgm
146;
147; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
148; PREGFX10-PACKED:       ; %bb.0: ; %main_body
149; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
150; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
151; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
152; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
153; PREGFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
154; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
155; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
156; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
157; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
158; PREGFX10-PACKED-NEXT:    s_endpgm
159;
160; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
161; GFX10-PACKED:       ; %bb.0: ; %main_body
162; GFX10-PACKED-NEXT:    s_clause 0x2
163; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
164; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
165; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
166; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
167; GFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
168; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
169; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
170; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
171; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
172; GFX10-PACKED-NEXT:    s_endpgm
173;
174; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
175; GFX11-PACKED:       ; %bb.0: ; %main_body
176; GFX11-PACKED-NEXT:    s_clause 0x2
177; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
178; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
179; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
180; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
181; GFX11-PACKED-NEXT:    s_and_b32 s4, s7, 0xffff
182; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
183; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s4
184; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
185; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
186; GFX11-PACKED-NEXT:    s_endpgm
187;
188; GFX12-PACKED-SDAG-LABEL: tbuffer_store_d16_xyz:
189; GFX12-PACKED-SDAG:       ; %bb.0: ; %main_body
190; GFX12-PACKED-SDAG-NEXT:    s_clause 0x1
191; GFX12-PACKED-SDAG-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
192; GFX12-PACKED-SDAG-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
193; GFX12-PACKED-SDAG-NEXT:    s_wait_kmcnt 0x0
194; GFX12-PACKED-SDAG-NEXT:    s_and_b32 s4, s9, 0xffff
195; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v0, s8
196; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v1, s4
197; GFX12-PACKED-SDAG-NEXT:    v_mov_b32_e32 v2, s10
198; GFX12-PACKED-SDAG-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
199; GFX12-PACKED-SDAG-NEXT:    s_endpgm
200;
201; GFX12-PACKED-GISEL-LABEL: tbuffer_store_d16_xyz:
202; GFX12-PACKED-GISEL:       ; %bb.0: ; %main_body
203; GFX12-PACKED-GISEL-NEXT:    s_clause 0x1
204; GFX12-PACKED-GISEL-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
205; GFX12-PACKED-GISEL-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
206; GFX12-PACKED-GISEL-NEXT:    s_wait_kmcnt 0x0
207; GFX12-PACKED-GISEL-NEXT:    s_pack_lh_b32_b16 s8, s8, s8
208; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v2, s10
209; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v0, s8
210; GFX12-PACKED-GISEL-NEXT:    v_mov_b32_e32 v1, s9
211; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
212; GFX12-PACKED-GISEL-NEXT:    s_endpgm
213main_body:
214  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
215  call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
216  ret void
217}
218
219define amdgpu_kernel void @tbuffer_store_d16_xyzw(<4 x i32> %rsrc, <4 x half> %data, i32 %vindex) {
220; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
221; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
222; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
223; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
224; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
225; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
226; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s5, 16
227; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
228; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s8, s4, 16
229; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
230; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
231; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s8
232; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
233; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s7
234; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v4, s6
235; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
236; PREGFX10-UNPACKED-NEXT:    s_endpgm
237;
238; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
239; PREGFX10-PACKED:       ; %bb.0: ; %main_body
240; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
241; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
242; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
243; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
244; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
245; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
246; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
247; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
248; PREGFX10-PACKED-NEXT:    s_endpgm
249;
250; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
251; GFX10-PACKED:       ; %bb.0: ; %main_body
252; GFX10-PACKED-NEXT:    s_clause 0x2
253; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
254; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
255; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
256; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
257; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
258; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
259; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
260; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
261; GFX10-PACKED-NEXT:    s_endpgm
262;
263; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
264; GFX11-PACKED:       ; %bb.0: ; %main_body
265; GFX11-PACKED-NEXT:    s_clause 0x2
266; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
267; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
268; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
269; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
270; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
271; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
272; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
273; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
274; GFX11-PACKED-NEXT:    s_endpgm
275;
276; GFX12-PACKED-LABEL: tbuffer_store_d16_xyzw:
277; GFX12-PACKED:       ; %bb.0: ; %main_body
278; GFX12-PACKED-NEXT:    s_clause 0x1
279; GFX12-PACKED-NEXT:    s_load_b96 s[8:10], s[4:5], 0x10
280; GFX12-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
281; GFX12-PACKED-NEXT:    s_wait_kmcnt 0x0
282; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, s8
283; GFX12-PACKED-NEXT:    v_mov_b32_e32 v1, s9
284; GFX12-PACKED-NEXT:    v_mov_b32_e32 v2, s10
285; GFX12-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
286; GFX12-PACKED-NEXT:    s_endpgm
287main_body:
288  call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
289  ret void
290}
291
292declare void @llvm.amdgcn.struct.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32)
293declare void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32)
294declare void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32)
295declare void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32)
296