xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s
3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
8
9define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
10; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
11; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
12; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
13; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb4,0xe8,0x00,0x00,0x00,0x80]
14; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
15; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
16;
17; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x:
18; PREGFX10-PACKED:       ; %bb.0: ; %main_body
19; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
20; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
21; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
22; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
23;
24; GFX10-PACKED-LABEL: tbuffer_load_d16_x:
25; GFX10-PACKED:       ; %bb.0: ; %main_body
26; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
27; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
28; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
29; GFX10-PACKED-NEXT:    ; return to shader part epilog
30;
31; GFX11-PACKED-LABEL: tbuffer_load_d16_x:
32; GFX11-PACKED:       ; %bb.0: ; %main_body
33; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, 0
34; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
35; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
36; GFX11-PACKED-NEXT:    ; return to shader part epilog
37;
38; GFX12-PACKED-LABEL: tbuffer_load_d16_x:
39; GFX12-PACKED:       ; %bb.0: ; %main_body
40; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, 0
41; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_x v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
42; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
43; GFX12-PACKED-NEXT:    ; return to shader part epilog
44main_body:
45  %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
46  ret half %data
47}
48
49define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
50; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy:
51; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
52; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
53; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb4,0xe8,0x00,0x00,0x00,0x80]
54; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
55; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
56; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
57;
58; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy:
59; PREGFX10-PACKED:       ; %bb.0: ; %main_body
60; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
61; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
62; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
63; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
64; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
65;
66; GFX10-PACKED-LABEL: tbuffer_load_d16_xy:
67; GFX10-PACKED:       ; %bb.0: ; %main_body
68; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
69; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
70; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
71; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
72; GFX10-PACKED-NEXT:    ; return to shader part epilog
73;
74; GFX11-PACKED-LABEL: tbuffer_load_d16_xy:
75; GFX11-PACKED:       ; %bb.0: ; %main_body
76; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, 0
77; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
78; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
79; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
80; GFX11-PACKED-NEXT:    ; return to shader part epilog
81;
82; GFX12-PACKED-LABEL: tbuffer_load_d16_xy:
83; GFX12-PACKED:       ; %bb.0: ; %main_body
84; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, 0
85; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xy v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
86; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
87; GFX12-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
88; GFX12-PACKED-NEXT:    ; return to shader part epilog
89main_body:
90  %data = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
91  %elt = extractelement <2 x half> %data, i32 1
92  ret half %elt
93}
94
95define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
96; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz:
97; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
98; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
99; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb5,0xe8,0x00,0x00,0x00,0x80]
100; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
101; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
102; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
103;
104; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
105; PREGFX10-PACKED:       ; %bb.0: ; %main_body
106; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
107; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
108; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
109; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
110; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
111;
112; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
113; GFX10-PACKED:       ; %bb.0: ; %main_body
114; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
115; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
116; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
117; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
118; GFX10-PACKED-NEXT:    ; return to shader part epilog
119;
120; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
121; GFX11-PACKED:       ; %bb.0: ; %main_body
122; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, 0
123; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
124; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
125; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, v1
126; GFX11-PACKED-NEXT:    ; return to shader part epilog
127;
128; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
129; GFX12-PACKED:       ; %bb.0: ; %main_body
130; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, 0
131; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
132; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
133; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, v1
134; GFX12-PACKED-NEXT:    ; return to shader part epilog
135main_body:
136  %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
137  %elt = extractelement <3 x half> %data, i32 2
138  ret half %elt
139}
140
141define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
142; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw:
143; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
144; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e]
145; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb5,0xe8,0x00,0x00,0x00,0x80]
146; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
147; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e]
148; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
149;
150; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
151; PREGFX10-PACKED:       ; %bb.0: ; %main_body
152; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
153; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen
154; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
155; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
156; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
157;
158; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
159; GFX10-PACKED:       ; %bb.0: ; %main_body
160; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, 0
161; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
162; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
163; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
164; GFX10-PACKED-NEXT:    ; return to shader part epilog
165;
166; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw:
167; GFX11-PACKED:       ; %bb.0: ; %main_body
168; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, 0
169; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen
170; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
171; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
172; GFX11-PACKED-NEXT:    ; return to shader part epilog
173;
174; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw:
175; GFX12-PACKED:       ; %bb.0: ; %main_body
176; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, 0
177; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen
178; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
179; GFX12-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
180; GFX12-PACKED-NEXT:    ; return to shader part epilog
181main_body:
182  %data = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0)
183  %elt = extractelement <4 x half> %data, i32 3
184  ret half %elt
185}
186
187declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32)
188declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32)
189declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32)
190declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32)
191