xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-UNPACKED %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=PREGFX10-PACKED %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-PACKED %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-PACKED %s
7
8define amdgpu_kernel void @tbuffer_store_d16_x(ptr addrspace(8) %rsrc, half %data, i32 %vindex) {
9; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_x:
10; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
11; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
12; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
13; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
14; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
15; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s5
16; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
17; PREGFX10-UNPACKED-NEXT:    s_endpgm
18;
19; PREGFX10-PACKED-LABEL: tbuffer_store_d16_x:
20; PREGFX10-PACKED:       ; %bb.0: ; %main_body
21; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
22; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
23; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
24; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
25; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
26; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
27; PREGFX10-PACKED-NEXT:    s_endpgm
28;
29; GFX10-PACKED-LABEL: tbuffer_store_d16_x:
30; GFX10-PACKED:       ; %bb.0: ; %main_body
31; GFX10-PACKED-NEXT:    s_clause 0x1
32; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
33; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
34; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
35; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
36; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
37; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
38; GFX10-PACKED-NEXT:    s_endpgm
39;
40; GFX11-PACKED-LABEL: tbuffer_store_d16_x:
41; GFX11-PACKED:       ; %bb.0: ; %main_body
42; GFX11-PACKED-NEXT:    s_clause 0x1
43; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
44; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
45; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
46; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
47; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
48; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_x v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
49; GFX11-PACKED-NEXT:    s_endpgm
50main_body:
51  call void @llvm.amdgcn.struct.ptr.tbuffer.store.f16(half %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
52  ret void
53}
54
55define amdgpu_kernel void @tbuffer_store_d16_xy(ptr addrspace(8) %rsrc, <2 x half> %data, i32 %vindex) {
56; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xy:
57; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
58; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
59; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
60; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
61; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s6, s4, 16
62; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
63; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
64; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s6
65; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
66; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
67; PREGFX10-UNPACKED-NEXT:    s_endpgm
68;
69; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xy:
70; PREGFX10-PACKED:       ; %bb.0: ; %main_body
71; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
72; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
73; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
74; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
75; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
76; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
77; PREGFX10-PACKED-NEXT:    s_endpgm
78;
79; GFX10-PACKED-LABEL: tbuffer_store_d16_xy:
80; GFX10-PACKED:       ; %bb.0: ; %main_body
81; GFX10-PACKED-NEXT:    s_clause 0x1
82; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
83; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
84; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
85; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
86; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
87; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
88; GFX10-PACKED-NEXT:    s_endpgm
89;
90; GFX11-PACKED-LABEL: tbuffer_store_d16_xy:
91; GFX11-PACKED:       ; %bb.0: ; %main_body
92; GFX11-PACKED-NEXT:    s_clause 0x1
93; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
94; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
95; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
96; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
97; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
98; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xy v0, v1, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
99; GFX11-PACKED-NEXT:    s_endpgm
100main_body:
101  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f16(<2 x half> %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
102  ret void
103}
104
105define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %vindex) {
106; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyz:
107; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
108; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
109; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
110; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
111; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
112; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
113; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s4, 16
114; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
115; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
116; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s7
117; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
118; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s6
119; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
120; PREGFX10-UNPACKED-NEXT:    s_endpgm
121;
122; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
123; PREGFX10-PACKED:       ; %bb.0: ; %main_body
124; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
125; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
126; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
127; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
128; PREGFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
129; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
130; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
131; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
132; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
133; PREGFX10-PACKED-NEXT:    s_endpgm
134;
135; GFX10-PACKED-LABEL: tbuffer_store_d16_xyz:
136; GFX10-PACKED:       ; %bb.0: ; %main_body
137; GFX10-PACKED-NEXT:    s_clause 0x2
138; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
139; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
140; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
141; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
142; GFX10-PACKED-NEXT:    s_and_b32 s5, s5, 0xffff
143; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
144; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
145; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
146; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
147; GFX10-PACKED-NEXT:    s_endpgm
148;
149; GFX11-PACKED-LABEL: tbuffer_store_d16_xyz:
150; GFX11-PACKED:       ; %bb.0: ; %main_body
151; GFX11-PACKED-NEXT:    s_clause 0x2
152; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
153; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
154; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
155; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
156; GFX11-PACKED-NEXT:    s_and_b32 s4, s7, 0xffff
157; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
158; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s4
159; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
160; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
161; GFX11-PACKED-NEXT:    s_endpgm
162main_body:
163  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
164  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
165  ret void
166}
167
168define amdgpu_kernel void @tbuffer_store_d16_xyzw(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %vindex) {
169; PREGFX10-UNPACKED-LABEL: tbuffer_store_d16_xyzw:
170; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
171; PREGFX10-UNPACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
172; PREGFX10-UNPACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
173; PREGFX10-UNPACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
174; PREGFX10-UNPACKED-NEXT:    s_waitcnt lgkmcnt(0)
175; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s7, s5, 16
176; PREGFX10-UNPACKED-NEXT:    s_and_b32 s5, s5, 0xffff
177; PREGFX10-UNPACKED-NEXT:    s_lshr_b32 s8, s4, 16
178; PREGFX10-UNPACKED-NEXT:    s_and_b32 s4, s4, 0xffff
179; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, s4
180; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v1, s8
181; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v2, s5
182; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v3, s7
183; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v4, s6
184; PREGFX10-UNPACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
185; PREGFX10-UNPACKED-NEXT:    s_endpgm
186;
187; PREGFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
188; PREGFX10-PACKED:       ; %bb.0: ; %main_body
189; PREGFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
190; PREGFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
191; PREGFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
192; PREGFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
193; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
194; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
195; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
196; PREGFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_NUM_FORMAT_USCALED] idxen
197; PREGFX10-PACKED-NEXT:    s_endpgm
198;
199; GFX10-PACKED-LABEL: tbuffer_store_d16_xyzw:
200; GFX10-PACKED:       ; %bb.0: ; %main_body
201; GFX10-PACKED-NEXT:    s_clause 0x2
202; GFX10-PACKED-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
203; GFX10-PACKED-NEXT:    s_load_dword s6, s[8:9], 0x18
204; GFX10-PACKED-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
205; GFX10-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
206; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, s4
207; GFX10-PACKED-NEXT:    v_mov_b32_e32 v1, s5
208; GFX10-PACKED-NEXT:    v_mov_b32_e32 v2, s6
209; GFX10-PACKED-NEXT:    tbuffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_11_11_SSCALED] idxen
210; GFX10-PACKED-NEXT:    s_endpgm
211;
212; GFX11-PACKED-LABEL: tbuffer_store_d16_xyzw:
213; GFX11-PACKED:       ; %bb.0: ; %main_body
214; GFX11-PACKED-NEXT:    s_clause 0x2
215; GFX11-PACKED-NEXT:    s_load_b64 s[6:7], s[4:5], 0x10
216; GFX11-PACKED-NEXT:    s_load_b32 s8, s[4:5], 0x18
217; GFX11-PACKED-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
218; GFX11-PACKED-NEXT:    s_waitcnt lgkmcnt(0)
219; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, s6
220; GFX11-PACKED-NEXT:    v_mov_b32_e32 v1, s7
221; GFX11-PACKED-NEXT:    v_mov_b32_e32 v2, s8
222; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
223; GFX11-PACKED-NEXT:    s_endpgm
224main_body:
225  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
226  ret void
227}
228
229declare void @llvm.amdgcn.struct.ptr.tbuffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32, i32)
230declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
231declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
232declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
233