xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.bf16.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s
7
8define bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) {
9; GFX7-LABEL: raw_ptr_buffer_load_bf16:
10; GFX7:       ; %bb.0:
11; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX7-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
13; GFX7-NEXT:    s_waitcnt vmcnt(0)
14; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
15; GFX7-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: raw_ptr_buffer_load_bf16:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
21; GFX8-NEXT:    s_waitcnt vmcnt(0)
22; GFX8-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-LABEL: raw_ptr_buffer_load_bf16:
25; GFX9:       ; %bb.0:
26; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
28; GFX9-NEXT:    s_waitcnt vmcnt(0)
29; GFX9-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX10-LABEL: raw_ptr_buffer_load_bf16:
32; GFX10:       ; %bb.0:
33; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX10-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
35; GFX10-NEXT:    s_waitcnt vmcnt(0)
36; GFX10-NEXT:    s_setpc_b64 s[30:31]
37;
38; GFX11-LABEL: raw_ptr_buffer_load_bf16:
39; GFX11:       ; %bb.0:
40; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
42; GFX11-NEXT:    s_waitcnt vmcnt(0)
43; GFX11-NEXT:    s_setpc_b64 s[30:31]
44  %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
45  ret bfloat %val
46}
47
48define <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) {
49; GFX7-LABEL: raw_ptr_buffer_load_v2bf16:
50; GFX7:       ; %bb.0:
51; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52; GFX7-NEXT:    buffer_load_dword v1, off, s[16:19], 0
53; GFX7-NEXT:    s_waitcnt vmcnt(0)
54; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
55; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
56; GFX7-NEXT:    s_setpc_b64 s[30:31]
57;
58; GFX8-LABEL: raw_ptr_buffer_load_v2bf16:
59; GFX8:       ; %bb.0:
60; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX8-NEXT:    buffer_load_dword v0, off, s[16:19], 0
62; GFX8-NEXT:    s_waitcnt vmcnt(0)
63; GFX8-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX9-LABEL: raw_ptr_buffer_load_v2bf16:
66; GFX9:       ; %bb.0:
67; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX9-NEXT:    buffer_load_dword v0, off, s[16:19], 0
69; GFX9-NEXT:    s_waitcnt vmcnt(0)
70; GFX9-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX10-LABEL: raw_ptr_buffer_load_v2bf16:
73; GFX10:       ; %bb.0:
74; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX10-NEXT:    buffer_load_dword v0, off, s[16:19], 0
76; GFX10-NEXT:    s_waitcnt vmcnt(0)
77; GFX10-NEXT:    s_setpc_b64 s[30:31]
78;
79; GFX11-LABEL: raw_ptr_buffer_load_v2bf16:
80; GFX11:       ; %bb.0:
81; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
83; GFX11-NEXT:    s_waitcnt vmcnt(0)
84; GFX11-NEXT:    s_setpc_b64 s[30:31]
85  %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
86  ret <2 x bfloat> %val
87}
88
89define <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) {
90; GFX7-LABEL: raw_ptr_buffer_load_v4bf16:
91; GFX7:       ; %bb.0:
92; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93; GFX7-NEXT:    buffer_load_dwordx2 v[2:3], off, s[16:19], 0
94; GFX7-NEXT:    s_waitcnt vmcnt(0)
95; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
96; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
97; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
98; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v3
99; GFX7-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX8-LABEL: raw_ptr_buffer_load_v4bf16:
102; GFX8:       ; %bb.0:
103; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX8-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
105; GFX8-NEXT:    s_waitcnt vmcnt(0)
106; GFX8-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX9-LABEL: raw_ptr_buffer_load_v4bf16:
109; GFX9:       ; %bb.0:
110; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
112; GFX9-NEXT:    s_waitcnt vmcnt(0)
113; GFX9-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX10-LABEL: raw_ptr_buffer_load_v4bf16:
116; GFX10:       ; %bb.0:
117; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], off, s[16:19], 0
119; GFX10-NEXT:    s_waitcnt vmcnt(0)
120; GFX10-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX11-LABEL: raw_ptr_buffer_load_v4bf16:
123; GFX11:       ; %bb.0:
124; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
126; GFX11-NEXT:    s_waitcnt vmcnt(0)
127; GFX11-NEXT:    s_setpc_b64 s[30:31]
128  %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
129  ret <4 x bfloat> %val
130}
131
132; FIXME
133; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) {
134;   %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
135;   ret <6 x bfloat> %val
136; }
137
138define <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) {
139; GFX7-LABEL: raw_ptr_buffer_load_v8bf16:
140; GFX7:       ; %bb.0:
141; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX7-NEXT:    buffer_load_dwordx4 v[4:7], off, s[16:19], 0
143; GFX7-NEXT:    s_waitcnt vmcnt(0)
144; GFX7-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
145; GFX7-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
146; GFX7-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
147; GFX7-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
148; GFX7-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
149; GFX7-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
150; GFX7-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
151; GFX7-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
152; GFX7-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX8-LABEL: raw_ptr_buffer_load_v8bf16:
155; GFX8:       ; %bb.0:
156; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX8-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
158; GFX8-NEXT:    s_waitcnt vmcnt(0)
159; GFX8-NEXT:    s_setpc_b64 s[30:31]
160;
161; GFX9-LABEL: raw_ptr_buffer_load_v8bf16:
162; GFX9:       ; %bb.0:
163; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
165; GFX9-NEXT:    s_waitcnt vmcnt(0)
166; GFX9-NEXT:    s_setpc_b64 s[30:31]
167;
168; GFX10-LABEL: raw_ptr_buffer_load_v8bf16:
169; GFX10:       ; %bb.0:
170; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], off, s[16:19], 0
172; GFX10-NEXT:    s_waitcnt vmcnt(0)
173; GFX10-NEXT:    s_setpc_b64 s[30:31]
174;
175; GFX11-LABEL: raw_ptr_buffer_load_v8bf16:
176; GFX11:       ; %bb.0:
177; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
179; GFX11-NEXT:    s_waitcnt vmcnt(0)
180; GFX11-NEXT:    s_setpc_b64 s[30:31]
181  %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0)
182  ret <8 x bfloat> %val
183}
184