1c0ff36eaSMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s 3c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s 4c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6c0ff36eaSMatt Arsenault; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11 %s 7c0ff36eaSMatt Arsenault 8c0ff36eaSMatt Arsenaultdefine bfloat @raw_ptr_buffer_load_bf16(ptr addrspace(8) inreg %rsrc) { 9c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_bf16: 10c0ff36eaSMatt Arsenault; GFX7: ; %bb.0: 11c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12*6548b635SShilei Tian; GFX7-NEXT: buffer_load_ushort v0, off, s[16:19], 0 13c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 14c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 15c0ff36eaSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 16c0ff36eaSMatt Arsenault; 17c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_bf16: 18c0ff36eaSMatt Arsenault; GFX8: ; %bb.0: 19c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20*6548b635SShilei Tian; GFX8-NEXT: buffer_load_ushort v0, off, s[16:19], 0 21c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 22c0ff36eaSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 23c0ff36eaSMatt Arsenault; 24c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_bf16: 25c0ff36eaSMatt Arsenault; GFX9: ; %bb.0: 26c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27*6548b635SShilei Tian; GFX9-NEXT: buffer_load_ushort v0, off, s[16:19], 0 28c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 29c0ff36eaSMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 30c0ff36eaSMatt Arsenault; 31c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_bf16: 32c0ff36eaSMatt Arsenault; GFX10: ; %bb.0: 33c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34*6548b635SShilei Tian; GFX10-NEXT: buffer_load_ushort v0, off, s[16:19], 0 35c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 36c0ff36eaSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 37c0ff36eaSMatt Arsenault; 38c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_bf16: 39c0ff36eaSMatt Arsenault; GFX11: ; %bb.0: 40c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41c0ff36eaSMatt Arsenault; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 42c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 43c0ff36eaSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 44c0ff36eaSMatt Arsenault %val = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 45c0ff36eaSMatt Arsenault ret bfloat %val 46c0ff36eaSMatt Arsenault} 47c0ff36eaSMatt Arsenault 48c0ff36eaSMatt Arsenaultdefine <2 x bfloat> @raw_ptr_buffer_load_v2bf16(ptr addrspace(8) inreg %rsrc) { 49c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v2bf16: 50c0ff36eaSMatt Arsenault; GFX7: ; %bb.0: 51c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52*6548b635SShilei Tian; GFX7-NEXT: buffer_load_dword v1, off, s[16:19], 0 53c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 54c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v1 55c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v1 56c0ff36eaSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 57c0ff36eaSMatt Arsenault; 58c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v2bf16: 59c0ff36eaSMatt Arsenault; GFX8: ; %bb.0: 60c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61*6548b635SShilei Tian; GFX8-NEXT: buffer_load_dword v0, off, s[16:19], 0 62c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 63c0ff36eaSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 64c0ff36eaSMatt Arsenault; 65c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v2bf16: 66c0ff36eaSMatt Arsenault; GFX9: ; %bb.0: 67c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68*6548b635SShilei Tian; GFX9-NEXT: buffer_load_dword v0, off, s[16:19], 0 69c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 70c0ff36eaSMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 71c0ff36eaSMatt Arsenault; 72c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v2bf16: 73c0ff36eaSMatt Arsenault; GFX10: ; %bb.0: 74c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75*6548b635SShilei Tian; GFX10-NEXT: buffer_load_dword v0, off, s[16:19], 0 76c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 77c0ff36eaSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 78c0ff36eaSMatt Arsenault; 79c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v2bf16: 80c0ff36eaSMatt Arsenault; GFX11: ; %bb.0: 81c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82c0ff36eaSMatt Arsenault; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 83c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 84c0ff36eaSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 85c0ff36eaSMatt Arsenault %val = call <2 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v2bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 86c0ff36eaSMatt Arsenault ret <2 x bfloat> %val 87c0ff36eaSMatt Arsenault} 88c0ff36eaSMatt Arsenault 89c0ff36eaSMatt Arsenaultdefine <4 x bfloat> @raw_ptr_buffer_load_v4bf16(ptr addrspace(8) inreg %rsrc) { 90c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v4bf16: 91c0ff36eaSMatt Arsenault; GFX7: ; %bb.0: 92c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93*6548b635SShilei Tian; GFX7-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0 94c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 95c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v2 96c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v2 97c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v3 98c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v3 99c0ff36eaSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 100c0ff36eaSMatt Arsenault; 101c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v4bf16: 102c0ff36eaSMatt Arsenault; GFX8: ; %bb.0: 103c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104*6548b635SShilei Tian; GFX8-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 105c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 106c0ff36eaSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 107c0ff36eaSMatt Arsenault; 108c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v4bf16: 109c0ff36eaSMatt Arsenault; GFX9: ; %bb.0: 110c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111*6548b635SShilei Tian; GFX9-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 112c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 113c0ff36eaSMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 114c0ff36eaSMatt Arsenault; 115c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v4bf16: 116c0ff36eaSMatt Arsenault; GFX10: ; %bb.0: 117c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 118*6548b635SShilei Tian; GFX10-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 119c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 120c0ff36eaSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 121c0ff36eaSMatt Arsenault; 122c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v4bf16: 123c0ff36eaSMatt Arsenault; GFX11: ; %bb.0: 124c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125c0ff36eaSMatt Arsenault; GFX11-NEXT: buffer_load_b64 v[0:1], off, s[0:3], 0 126c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 127c0ff36eaSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 128c0ff36eaSMatt Arsenault %val = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 129c0ff36eaSMatt Arsenault ret <4 x bfloat> %val 130c0ff36eaSMatt Arsenault} 131c0ff36eaSMatt Arsenault 132c0ff36eaSMatt Arsenault; FIXME 133c0ff36eaSMatt Arsenault; define <6 x bfloat> @raw_ptr_buffer_load_v6bf16(ptr addrspace(8) inreg %rsrc) { 134c0ff36eaSMatt Arsenault; %val = call <6 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v6bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 135c0ff36eaSMatt Arsenault; ret <6 x bfloat> %val 136c0ff36eaSMatt Arsenault; } 137c0ff36eaSMatt Arsenault 138c0ff36eaSMatt Arsenaultdefine <8 x bfloat> @raw_ptr_buffer_load_v8bf16(ptr addrspace(8) inreg %rsrc) { 139c0ff36eaSMatt Arsenault; GFX7-LABEL: raw_ptr_buffer_load_v8bf16: 140c0ff36eaSMatt Arsenault; GFX7: ; %bb.0: 141c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142*6548b635SShilei Tian; GFX7-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 143c0ff36eaSMatt Arsenault; GFX7-NEXT: s_waitcnt vmcnt(0) 144c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v4 145c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v1, 0xffff0000, v4 146c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v5 147c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v5 148c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v6 149c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v6 150c0ff36eaSMatt Arsenault; GFX7-NEXT: v_lshlrev_b32_e32 v6, 16, v7 151c0ff36eaSMatt Arsenault; GFX7-NEXT: v_and_b32_e32 v7, 0xffff0000, v7 152c0ff36eaSMatt Arsenault; GFX7-NEXT: s_setpc_b64 s[30:31] 153c0ff36eaSMatt Arsenault; 154c0ff36eaSMatt Arsenault; GFX8-LABEL: raw_ptr_buffer_load_v8bf16: 155c0ff36eaSMatt Arsenault; GFX8: ; %bb.0: 156c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157*6548b635SShilei Tian; GFX8-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 158c0ff36eaSMatt Arsenault; GFX8-NEXT: s_waitcnt vmcnt(0) 159c0ff36eaSMatt Arsenault; GFX8-NEXT: s_setpc_b64 s[30:31] 160c0ff36eaSMatt Arsenault; 161c0ff36eaSMatt Arsenault; GFX9-LABEL: raw_ptr_buffer_load_v8bf16: 162c0ff36eaSMatt Arsenault; GFX9: ; %bb.0: 163c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164*6548b635SShilei Tian; GFX9-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 165c0ff36eaSMatt Arsenault; GFX9-NEXT: s_waitcnt vmcnt(0) 166c0ff36eaSMatt Arsenault; GFX9-NEXT: s_setpc_b64 s[30:31] 167c0ff36eaSMatt Arsenault; 168c0ff36eaSMatt Arsenault; GFX10-LABEL: raw_ptr_buffer_load_v8bf16: 169c0ff36eaSMatt Arsenault; GFX10: ; %bb.0: 170c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171*6548b635SShilei Tian; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 172c0ff36eaSMatt Arsenault; GFX10-NEXT: s_waitcnt vmcnt(0) 173c0ff36eaSMatt Arsenault; GFX10-NEXT: s_setpc_b64 s[30:31] 174c0ff36eaSMatt Arsenault; 175c0ff36eaSMatt Arsenault; GFX11-LABEL: raw_ptr_buffer_load_v8bf16: 176c0ff36eaSMatt Arsenault; GFX11: ; %bb.0: 177c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178c0ff36eaSMatt Arsenault; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 179c0ff36eaSMatt Arsenault; GFX11-NEXT: s_waitcnt vmcnt(0) 180c0ff36eaSMatt Arsenault; GFX11-NEXT: s_setpc_b64 s[30:31] 181c0ff36eaSMatt Arsenault %val = call <8 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v8bf16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 182c0ff36eaSMatt Arsenault ret <8 x bfloat> %val 183c0ff36eaSMatt Arsenault} 184