1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,UNPACKED %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,PACKED %s 5 6define amdgpu_kernel void @buffer_store_format_d16_x(ptr addrspace(8) %rsrc, [8 x i32], half %data, [8 x i32], i32 %index) { 7; GCN-LABEL: buffer_store_format_d16_x: 8; GCN: ; %bb.0: ; %main_body 9; GCN-NEXT: s_load_dword s4, s[8:9], 0x30 10; GCN-NEXT: s_load_dword s5, s[8:9], 0x54 11; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 12; GCN-NEXT: s_waitcnt lgkmcnt(0) 13; GCN-NEXT: v_mov_b32_e32 v0, s4 14; GCN-NEXT: v_mov_b32_e32 v1, s5 15; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 idxen 16; GCN-NEXT: s_endpgm 17main_body: 18 call void @llvm.amdgcn.struct.ptr.buffer.store.format.f16(half %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) 19 ret void 20} 21 22define amdgpu_kernel void @buffer_store_format_d16_xy(ptr addrspace(8) %rsrc, <2 x half> %data, i32 %index) { 23; UNPACKED-LABEL: buffer_store_format_d16_xy: 24; UNPACKED: ; %bb.0: ; %main_body 25; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 26; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 27; UNPACKED-NEXT: s_waitcnt lgkmcnt(0) 28; UNPACKED-NEXT: s_lshr_b32 s6, s4, 16 29; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff 30; UNPACKED-NEXT: v_mov_b32_e32 v0, s4 31; UNPACKED-NEXT: v_mov_b32_e32 v1, s6 32; UNPACKED-NEXT: v_mov_b32_e32 v2, s5 33; UNPACKED-NEXT: buffer_store_format_d16_xy v[0:1], v2, s[0:3], 0 idxen 34; UNPACKED-NEXT: s_endpgm 35; 36; PACKED-LABEL: buffer_store_format_d16_xy: 37; PACKED: ; %bb.0: ; %main_body 38; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 39; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 40; PACKED-NEXT: s_waitcnt lgkmcnt(0) 41; PACKED-NEXT: v_mov_b32_e32 v0, s4 42; PACKED-NEXT: v_mov_b32_e32 v1, s5 43; PACKED-NEXT: buffer_store_format_d16_xy v0, v1, s[0:3], 0 idxen 44; PACKED-NEXT: s_endpgm 45main_body: 46 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f16(<2 x half> %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) 47 ret void 48} 49 50define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %index) { 51; UNPACKED-LABEL: buffer_store_format_d16_xyz: 52; UNPACKED: ; %bb.0: ; %main_body 53; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 54; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 55; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18 56; UNPACKED-NEXT: s_waitcnt lgkmcnt(0) 57; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff 58; UNPACKED-NEXT: s_lshr_b32 s7, s4, 16 59; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff 60; UNPACKED-NEXT: v_mov_b32_e32 v0, s4 61; UNPACKED-NEXT: v_mov_b32_e32 v1, s7 62; UNPACKED-NEXT: v_mov_b32_e32 v2, s5 63; UNPACKED-NEXT: v_mov_b32_e32 v3, s6 64; UNPACKED-NEXT: buffer_store_format_d16_xyz v[0:2], v3, s[0:3], 0 idxen 65; UNPACKED-NEXT: s_endpgm 66; 67; PACKED-LABEL: buffer_store_format_d16_xyz: 68; PACKED: ; %bb.0: ; %main_body 69; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 70; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18 71; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 72; PACKED-NEXT: s_waitcnt lgkmcnt(0) 73; PACKED-NEXT: s_and_b32 s5, s5, 0xffff 74; PACKED-NEXT: v_mov_b32_e32 v0, s4 75; PACKED-NEXT: v_mov_b32_e32 v1, s5 76; PACKED-NEXT: v_mov_b32_e32 v2, s6 77; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen 78; PACKED-NEXT: s_endpgm 79main_body: 80 %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2> 81 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) 82 ret void 83} 84 85define amdgpu_kernel void @buffer_store_format_d16_xyzw(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %index) { 86; UNPACKED-LABEL: buffer_store_format_d16_xyzw: 87; UNPACKED: ; %bb.0: ; %main_body 88; UNPACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 89; UNPACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 90; UNPACKED-NEXT: s_load_dword s6, s[8:9], 0x18 91; UNPACKED-NEXT: s_waitcnt lgkmcnt(0) 92; UNPACKED-NEXT: s_lshr_b32 s7, s5, 16 93; UNPACKED-NEXT: s_and_b32 s5, s5, 0xffff 94; UNPACKED-NEXT: s_lshr_b32 s8, s4, 16 95; UNPACKED-NEXT: s_and_b32 s4, s4, 0xffff 96; UNPACKED-NEXT: v_mov_b32_e32 v0, s4 97; UNPACKED-NEXT: v_mov_b32_e32 v1, s8 98; UNPACKED-NEXT: v_mov_b32_e32 v2, s5 99; UNPACKED-NEXT: v_mov_b32_e32 v3, s7 100; UNPACKED-NEXT: v_mov_b32_e32 v4, s6 101; UNPACKED-NEXT: buffer_store_format_d16_xyzw v[0:3], v4, s[0:3], 0 idxen 102; UNPACKED-NEXT: s_endpgm 103; 104; PACKED-LABEL: buffer_store_format_d16_xyzw: 105; PACKED: ; %bb.0: ; %main_body 106; PACKED-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 107; PACKED-NEXT: s_load_dword s6, s[8:9], 0x18 108; PACKED-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 109; PACKED-NEXT: s_waitcnt lgkmcnt(0) 110; PACKED-NEXT: v_mov_b32_e32 v0, s4 111; PACKED-NEXT: v_mov_b32_e32 v1, s5 112; PACKED-NEXT: v_mov_b32_e32 v2, s6 113; PACKED-NEXT: buffer_store_format_d16_xyzw v[0:1], v2, s[0:3], 0 idxen 114; PACKED-NEXT: s_endpgm 115main_body: 116 call void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) 117 ret void 118} 119 120define amdgpu_kernel void @buffer_store_format_i16_x(ptr addrspace(8) %rsrc, [8 x i32], i16 %data, [8 x i32], i32 %index) { 121; GCN-LABEL: buffer_store_format_i16_x: 122; GCN: ; %bb.0: ; %main_body 123; GCN-NEXT: s_load_dword s4, s[8:9], 0x30 124; GCN-NEXT: s_load_dword s5, s[8:9], 0x54 125; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 126; GCN-NEXT: s_waitcnt lgkmcnt(0) 127; GCN-NEXT: v_mov_b32_e32 v0, s4 128; GCN-NEXT: v_mov_b32_e32 v1, s5 129; GCN-NEXT: buffer_store_format_d16_x v0, v1, s[0:3], 0 idxen 130; GCN-NEXT: s_endpgm 131main_body: 132 call void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16 %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0) 133 ret void 134} 135 136declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32, i32) 137declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32) 138declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32) 139declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32) 140declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16, ptr addrspace(8), i32, i32, i32, i32) 141