xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll (revision ba52f06f9d92c7ca04b440f618f8d352ea121fcc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s
3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s
7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s
8
9define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) {
10; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x:
11; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
12; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb4,0xe8,0x00,0x00,0x00,0x80]
13; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
14; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
15;
16; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x:
17; PREGFX10-PACKED:       ; %bb.0: ; %main_body
18; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
19; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
20; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
21;
22; GFX10-PACKED-LABEL: tbuffer_load_d16_x:
23; GFX10-PACKED:       ; %bb.0: ; %main_body
24; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
25; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
26; GFX10-PACKED-NEXT:    ; return to shader part epilog
27;
28; GFX11-PACKED-LABEL: tbuffer_load_d16_x:
29; GFX11-PACKED:       ; %bb.0: ; %main_body
30; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
31; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
32; GFX11-PACKED-NEXT:    ; return to shader part epilog
33;
34; GFX12-PACKED-LABEL: tbuffer_load_d16_x:
35; GFX12-PACKED:       ; %bb.0: ; %main_body
36; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT]
37; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
38; GFX12-PACKED-NEXT:    ; return to shader part epilog
39main_body:
40  %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
41  ret half %data
42}
43
44define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) {
45; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy:
46; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
47; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb4,0xe8,0x00,0x00,0x00,0x80]
48; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
49; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e]
50; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
51;
52; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy:
53; PREGFX10-PACKED:       ; %bb.0: ; %main_body
54; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
55; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
56; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
57; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
58;
59; GFX10-PACKED-LABEL: tbuffer_load_d16_xy:
60; GFX10-PACKED:       ; %bb.0: ; %main_body
61; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
62; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
63; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
64; GFX10-PACKED-NEXT:    ; return to shader part epilog
65;
66; GFX11-PACKED-LABEL: tbuffer_load_d16_xy:
67; GFX11-PACKED:       ; %bb.0: ; %main_body
68; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
69; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
70; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
71; GFX11-PACKED-NEXT:    ; return to shader part epilog
72;
73; GFX12-PACKED-LABEL: tbuffer_load_d16_xy:
74; GFX12-PACKED:       ; %bb.0: ; %main_body
75; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xy v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT]
76; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
77; GFX12-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
78; GFX12-PACKED-NEXT:    ; return to shader part epilog
79main_body:
80  %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
81  %elt = extractelement <2 x half> %data, i32 1
82  ret half %elt
83}
84
85define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) {
86; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz:
87; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
88; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb5,0xe8,0x00,0x00,0x00,0x80]
89; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
90; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e]
91; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
92;
93; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
94; PREGFX10-PACKED:       ; %bb.0: ; %main_body
95; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
96; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
97; PREGFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
98; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
99;
100; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz:
101; GFX10-PACKED:       ; %bb.0: ; %main_body
102; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
103; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
104; GFX10-PACKED-NEXT:    v_mov_b32_e32 v0, v1
105; GFX10-PACKED-NEXT:    ; return to shader part epilog
106;
107; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz:
108; GFX11-PACKED:       ; %bb.0: ; %main_body
109; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
110; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
111; GFX11-PACKED-NEXT:    v_mov_b32_e32 v0, v1
112; GFX11-PACKED-NEXT:    ; return to shader part epilog
113;
114; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz:
115; GFX12-PACKED:       ; %bb.0: ; %main_body
116; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
117; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
118; GFX12-PACKED-NEXT:    v_mov_b32_e32 v0, v1
119; GFX12-PACKED-NEXT:    ; return to shader part epilog
120main_body:
121  %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
122  %elt = extractelement <3 x half> %data, i32 2
123  ret half %elt
124}
125
126define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) {
127; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw:
128; PREGFX10-UNPACKED:       ; %bb.0: ; %main_body
129; PREGFX10-UNPACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb5,0xe8,0x00,0x00,0x00,0x80]
130; PREGFX10-UNPACKED-NEXT:    s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf]
131; PREGFX10-UNPACKED-NEXT:    v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e]
132; PREGFX10-UNPACKED-NEXT:    ; return to shader part epilog
133;
134; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
135; PREGFX10-PACKED:       ; %bb.0: ; %main_body
136; PREGFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM]
137; PREGFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
138; PREGFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
139; PREGFX10-PACKED-NEXT:    ; return to shader part epilog
140;
141; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw:
142; GFX10-PACKED:       ; %bb.0: ; %main_body
143; GFX10-PACKED-NEXT:    tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
144; GFX10-PACKED-NEXT:    s_waitcnt vmcnt(0)
145; GFX10-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
146; GFX10-PACKED-NEXT:    ; return to shader part epilog
147;
148; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw:
149; GFX11-PACKED:       ; %bb.0: ; %main_body
150; GFX11-PACKED-NEXT:    tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT]
151; GFX11-PACKED-NEXT:    s_waitcnt vmcnt(0)
152; GFX11-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
153; GFX11-PACKED-NEXT:    ; return to shader part epilog
154;
155; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw:
156; GFX12-PACKED:       ; %bb.0: ; %main_body
157; GFX12-PACKED-NEXT:    tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT]
158; GFX12-PACKED-NEXT:    s_wait_loadcnt 0x0
159; GFX12-PACKED-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
160; GFX12-PACKED-NEXT:    ; return to shader part epilog
161main_body:
162  %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0)
163  %elt = extractelement <4 x half> %data, i32 3
164  ret half %elt
165}
166
167declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32)
168declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32)
169declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32)
170declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32)
171