1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s 7 8define bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) { 9; GFX7-LABEL: raw_ptr_buffer_load_bf16: 10; GFX7: ; %bb.0: 11; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX7-NEXT: buffer_load_ushort v0, off, s[16:19], 0 13; GFX7-NEXT: s_waitcnt vmcnt(0) 14; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 15; GFX7-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: raw_ptr_buffer_load_bf16: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: buffer_load_ushort v0, off, s[16:19], 0 21; GFX8-NEXT: s_waitcnt vmcnt(0) 22; GFX8-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: raw_ptr_buffer_load_bf16: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: buffer_load_ushort v0, off, s[16:19], 0 28; GFX9-NEXT: s_waitcnt vmcnt(0) 29; GFX9-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: raw_ptr_buffer_load_bf16: 32; GFX10: ; %bb.0: 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: buffer_load_ushort v0, off, s[16:19], 0 35; GFX10-NEXT: s_waitcnt vmcnt(0) 36; GFX10-NEXT: s_setpc_b64 s[30:31] 37; 38; GFX11-LABEL: raw_ptr_buffer_load_bf16: 39; GFX11: ; %bb.0: 40; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 42; GFX11-NEXT: s_waitcnt vmcnt(0) 43; GFX11-NEXT: s_setpc_b64 s[30:31] 44 %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 45 ret bfloat %val 46} 47 48define <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) { 49; GFX7-LABEL: raw_ptr_buffer_load_v2bf16: 50; GFX7: ; %bb.0: 51; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX7-NEXT: buffer_load_dword v1, off, s[16:19], 0 53; GFX7-NEXT: s_waitcnt vmcnt(0) 54; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v1 55; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 56; GFX7-NEXT: s_setpc_b64 s[30:31] 57; 58; GFX8-LABEL: raw_ptr_buffer_load_v2bf16: 59; GFX8: ; %bb.0: 60; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX8-NEXT: buffer_load_dword v0, off, s[16:19], 0 62; GFX8-NEXT: s_waitcnt vmcnt(0) 63; GFX8-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX9-LABEL: raw_ptr_buffer_load_v2bf16: 66; GFX9: ; %bb.0: 67; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX9-NEXT: buffer_load_dword v0, off, s[16:19], 0 69; GFX9-NEXT: s_waitcnt vmcnt(0) 70; GFX9-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX10-LABEL: raw_ptr_buffer_load_v2bf16: 73; GFX10: ; %bb.0: 74; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX10-NEXT: buffer_load_dword v0, off, s[16:19], 0 76; GFX10-NEXT: s_waitcnt vmcnt(0) 77; GFX10-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX11-LABEL: raw_ptr_buffer_load_v2bf16: 80; GFX11: ; %bb.0: 81; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 83; GFX11-NEXT: s_waitcnt vmcnt(0) 84; GFX11-NEXT: s_setpc_b64 s[30:31] 85 %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 86 ret <2 x bfloat> %val 87} 88 89define <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) { 90; GFX7-LABEL: raw_ptr_buffer_load_v4bf16: 91; GFX7: ; %bb.0: 92; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; GFX7-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 94; GFX7-NEXT: s_waitcnt vmcnt(0) 95; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v2 96; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 97; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v3 98; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 99; GFX7-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX8-LABEL: raw_ptr_buffer_load_v4bf16: 102; GFX8: ; %bb.0: 103; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX8-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 105; GFX8-NEXT: s_waitcnt vmcnt(0) 106; GFX8-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX9-LABEL: raw_ptr_buffer_load_v4bf16: 109; GFX9: ; %bb.0: 110; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 112; GFX9-NEXT: s_waitcnt vmcnt(0) 113; GFX9-NEXT: s_setpc_b64 s[30:31] 114; 115; GFX10-LABEL: raw_ptr_buffer_load_v4bf16: 116; GFX10: ; %bb.0: 117; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 119; GFX10-NEXT: s_waitcnt vmcnt(0) 120; GFX10-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX11-LABEL: raw_ptr_buffer_load_v4bf16: 123; GFX11: ; %bb.0: 124; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 126; GFX11-NEXT: s_waitcnt vmcnt(0) 127; GFX11-NEXT: s_setpc_b64 s[30:31] 128 %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 129 ret <4 x bfloat> %val 130} 131 132; FIXME 133; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) { 134; %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 135; ret <6 x bfloat> %val 136; } 137 138define <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) { 139; GFX7-LABEL: raw_ptr_buffer_load_v8bf16: 140; GFX7: ; %bb.0: 141; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX7-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 143; GFX7-NEXT: s_waitcnt vmcnt(0) 144; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v4 145; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 146; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v5 147; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 148; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6 149; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 150; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v7 151; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 152; GFX7-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX8-LABEL: raw_ptr_buffer_load_v8bf16: 155; GFX8: ; %bb.0: 156; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX8-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 158; GFX8-NEXT: s_waitcnt vmcnt(0) 159; GFX8-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX9-LABEL: raw_ptr_buffer_load_v8bf16: 162; GFX9: ; %bb.0: 163; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 165; GFX9-NEXT: s_waitcnt vmcnt(0) 166; GFX9-NEXT: s_setpc_b64 s[30:31] 167; 168; GFX10-LABEL: raw_ptr_buffer_load_v8bf16: 169; GFX10: ; %bb.0: 170; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 172; GFX10-NEXT: s_waitcnt vmcnt(0) 173; GFX10-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX11-LABEL: raw_ptr_buffer_load_v8bf16: 176; GFX11: ; %bb.0: 177; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 179; GFX11-NEXT: s_waitcnt vmcnt(0) 180; GFX11-NEXT: s_setpc_b64 s[30:31] 181 %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 182 ret <8 x bfloat> %val 183} 184