xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.bf16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1c0ff36eaSMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
4c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s
7c0ff36eaSMatt Arsenault
8c0ff36eaSMatt Arsenaultdefine bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) {
9c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_bf16:
10c0ff36eaSMatt Arsenault; GFX7:       ; %bb.0:
11c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12*6548b635SShilei Tian; GFX7-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
13c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
14c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
15c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
16c0ff36eaSMatt Arsenault;
17c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_bf16:
18c0ff36eaSMatt Arsenault; GFX8:       ; %bb.0:
19c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20*6548b635SShilei Tian; GFX8-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
21c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
22c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
23c0ff36eaSMatt Arsenault;
24c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_bf16:
25c0ff36eaSMatt Arsenault; GFX9:       ; %bb.0:
26c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27*6548b635SShilei Tian; GFX9-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
28c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0)
29c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
30c0ff36eaSMatt Arsenault;
31c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_bf16:
32c0ff36eaSMatt Arsenault; GFX10:       ; %bb.0:
33c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34*6548b635SShilei Tian; GFX10-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
35c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
36c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
37c0ff36eaSMatt Arsenault;
38c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_bf16:
39c0ff36eaSMatt Arsenault; GFX11:       ; %bb.0:
40c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41c0ff36eaSMatt Arsenault; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
42c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
43c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
44c0ff36eaSMatt Arsenault  %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
45c0ff36eaSMatt Arsenault  ret bfloat %val
46c0ff36eaSMatt Arsenault}
47c0ff36eaSMatt Arsenault
48c0ff36eaSMatt Arsenaultdefine <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) {
49c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v2bf16:
50c0ff36eaSMatt Arsenault; GFX7:       ; %bb.0:
51c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52*6548b635SShilei Tian; GFX7-NEXT:    buffer_load_dword v1, off, s[16:19], 0
53c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
54c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
55c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
56c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
57c0ff36eaSMatt Arsenault;
58c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v2bf16:
59c0ff36eaSMatt Arsenault; GFX8:       ; %bb.0:
60c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61*6548b635SShilei Tian; GFX8-NEXT:    buffer_load_dword v0, off, s[16:19], 0
62c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
63c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
64c0ff36eaSMatt Arsenault;
65c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v2bf16:
66c0ff36eaSMatt Arsenault; GFX9:       ; %bb.0:
67c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68*6548b635SShilei Tian; GFX9-NEXT:    buffer_load_dword v0, off, s[16:19], 0
69c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0)
70c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
71c0ff36eaSMatt Arsenault;
72c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v2bf16:
73c0ff36eaSMatt Arsenault; GFX10:       ; %bb.0:
74c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75*6548b635SShilei Tian; GFX10-NEXT:    buffer_load_dword v0, off, s[16:19], 0
76c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
77c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
78c0ff36eaSMatt Arsenault;
79c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v2bf16:
80c0ff36eaSMatt Arsenault; GFX11:       ; %bb.0:
81c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82c0ff36eaSMatt Arsenault; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
83c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
84c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
85c0ff36eaSMatt Arsenault  %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
86c0ff36eaSMatt Arsenault  ret <2 x bfloat> %val
87c0ff36eaSMatt Arsenault}
88c0ff36eaSMatt Arsenault
89c0ff36eaSMatt Arsenaultdefine <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) {
90c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v4bf16:
91c0ff36eaSMatt Arsenault; GFX7:       ; %bb.0:
92c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93*6548b635SShilei Tian; GFX7-NEXT:    buffer_load_dwordx2 v[2:3], off, s[16:19], 0
94c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
95c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
96c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
97c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
98c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v3
99c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
100c0ff36eaSMatt Arsenault;
101c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v4bf16:
102c0ff36eaSMatt Arsenault; GFX8:       ; %bb.0:
103c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104*6548b635SShilei Tian; GFX8-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
105c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
106c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
107c0ff36eaSMatt Arsenault;
108c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v4bf16:
109c0ff36eaSMatt Arsenault; GFX9:       ; %bb.0:
110c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111*6548b635SShilei Tian; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
112c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0)
113c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
114c0ff36eaSMatt Arsenault;
115c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v4bf16:
116c0ff36eaSMatt Arsenault; GFX10:       ; %bb.0:
117c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118*6548b635SShilei Tian; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
119c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
120c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
121c0ff36eaSMatt Arsenault;
122c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v4bf16:
123c0ff36eaSMatt Arsenault; GFX11:       ; %bb.0:
124c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125c0ff36eaSMatt Arsenault; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
126c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
127c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
128c0ff36eaSMatt Arsenault  %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
129c0ff36eaSMatt Arsenault  ret <4 x bfloat> %val
130c0ff36eaSMatt Arsenault}
131c0ff36eaSMatt Arsenault
132c0ff36eaSMatt Arsenault; FIXME
133c0ff36eaSMatt Arsenault; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) {
134c0ff36eaSMatt Arsenault;   %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
135c0ff36eaSMatt Arsenault;   ret <6 x bfloat> %val
136c0ff36eaSMatt Arsenault; }
137c0ff36eaSMatt Arsenault
138c0ff36eaSMatt Arsenaultdefine <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) {
139c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v8bf16:
140c0ff36eaSMatt Arsenault; GFX7:       ; %bb.0:
141c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142*6548b635SShilei Tian; GFX7-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0
143c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_waitcnt vmcnt(0)
144c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
145c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
146c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
147c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
148c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
149c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
150c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
151c0ff36eaSMatt Arsenault; GFX7-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
152c0ff36eaSMatt Arsenault; GFX7-NEXT:    s_setpc_b64 s[30:31]
153c0ff36eaSMatt Arsenault;
154c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v8bf16:
155c0ff36eaSMatt Arsenault; GFX8:       ; %bb.0:
156c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157*6548b635SShilei Tian; GFX8-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
158c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_waitcnt vmcnt(0)
159c0ff36eaSMatt Arsenault; GFX8-NEXT:    s_setpc_b64 s[30:31]
160c0ff36eaSMatt Arsenault;
161c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v8bf16:
162c0ff36eaSMatt Arsenault; GFX9:       ; %bb.0:
163c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164*6548b635SShilei Tian; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
165c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_waitcnt vmcnt(0)
166c0ff36eaSMatt Arsenault; GFX9-NEXT:    s_setpc_b64 s[30:31]
167c0ff36eaSMatt Arsenault;
168c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v8bf16:
169c0ff36eaSMatt Arsenault; GFX10:       ; %bb.0:
170c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171*6548b635SShilei Tian; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
172c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_waitcnt vmcnt(0)
173c0ff36eaSMatt Arsenault; GFX10-NEXT:    s_setpc_b64 s[30:31]
174c0ff36eaSMatt Arsenault;
175c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v8bf16:
176c0ff36eaSMatt Arsenault; GFX11:       ; %bb.0:
177c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178c0ff36eaSMatt Arsenault; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
179c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_waitcnt vmcnt(0)
180c0ff36eaSMatt Arsenault; GFX11-NEXT:    s_setpc_b64 s[30:31]
181c0ff36eaSMatt Arsenault  %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
182c0ff36eaSMatt Arsenault  ret <8 x bfloat> %val
183c0ff36eaSMatt Arsenault}
184