1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL %s 3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=SDAG %s 4 5define amdgpu_kernel void @buffer_ptr_vector_ops(ptr addrspace(1) %somewhere) { 6; GISEL-LABEL: buffer_ptr_vector_ops: 7; GISEL: ; %bb.0: ; %main_body 8; GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x24 9; GISEL-NEXT: v_mov_b32_e32 v8, 0 10; GISEL-NEXT: s_waitcnt lgkmcnt(0) 11; GISEL-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 12; GISEL-NEXT: s_waitcnt lgkmcnt(0) 13; GISEL-NEXT: v_mov_b32_e32 v0, s0 14; GISEL-NEXT: v_mov_b32_e32 v4, s4 15; GISEL-NEXT: v_mov_b32_e32 v1, s1 16; GISEL-NEXT: v_mov_b32_e32 v2, s2 17; GISEL-NEXT: v_mov_b32_e32 v3, s3 18; GISEL-NEXT: v_mov_b32_e32 v5, s5 19; GISEL-NEXT: v_mov_b32_e32 v6, s6 20; GISEL-NEXT: v_mov_b32_e32 v7, s7 21; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 22; GISEL-NEXT: global_store_dwordx4 v8, v[4:7], s[8:9] offset:32 23; GISEL-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] offset:48 24; GISEL-NEXT: s_endpgm 25; 26; SDAG-LABEL: buffer_ptr_vector_ops: 27; SDAG: ; %bb.0: ; %main_body 28; SDAG-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x24 29; SDAG-NEXT: v_mov_b32_e32 v8, 0 30; SDAG-NEXT: s_waitcnt lgkmcnt(0) 31; SDAG-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0 32; SDAG-NEXT: s_waitcnt lgkmcnt(0) 33; SDAG-NEXT: v_mov_b32_e32 v0, s0 34; SDAG-NEXT: v_mov_b32_e32 v1, s1 35; SDAG-NEXT: v_mov_b32_e32 v2, s2 36; SDAG-NEXT: v_mov_b32_e32 v3, s3 37; SDAG-NEXT: v_mov_b32_e32 v4, s4 38; SDAG-NEXT: v_mov_b32_e32 v5, s5 39; SDAG-NEXT: v_mov_b32_e32 v6, s6 40; SDAG-NEXT: v_mov_b32_e32 v7, s7 41; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 42; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[8:9] offset:48 43; SDAG-NEXT: global_store_dwordx4 v8, v[4:7], s[8:9] offset:32 44; SDAG-NEXT: s_endpgm 45main_body: 46 %buffers = load <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere 47 %buf1 = extractelement <2 x ptr addrspace(8)> %buffers, i32 0 48 %buf2 = extractelement <2 x ptr addrspace(8)> %buffers, i32 1 49 %buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128 50 %buf1.vec = bitcast i128 %buf1.int to <4 x i32> 51 call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0) 52 %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0> 53 %somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1 54 store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next 55 ret void 56} 57 58%fat_buffer_struct = type {ptr addrspace(8), i32} 59 60define amdgpu_kernel void @buffer_structs(%fat_buffer_struct %arg, ptr addrspace(1) %dest) { 61; GISEL-LABEL: buffer_structs: 62; GISEL: ; %bb.0: ; %main_body 63; GISEL-NEXT: s_load_dword s6, s[4:5], 0x34 64; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 65; GISEL-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 66; GISEL-NEXT: v_mov_b32_e32 v5, 0 67; GISEL-NEXT: s_waitcnt lgkmcnt(0) 68; GISEL-NEXT: s_ashr_i32 s7, s6, 31 69; GISEL-NEXT: s_lshl_b64 s[4:5], s[6:7], 5 70; GISEL-NEXT: s_add_u32 s4, s8, s4 71; GISEL-NEXT: v_mov_b32_e32 v0, s0 72; GISEL-NEXT: v_mov_b32_e32 v4, s6 73; GISEL-NEXT: s_addc_u32 s5, s9, s5 74; GISEL-NEXT: v_mov_b32_e32 v1, s1 75; GISEL-NEXT: v_mov_b32_e32 v2, s2 76; GISEL-NEXT: v_mov_b32_e32 v3, s3 77; GISEL-NEXT: buffer_store_dword v4, v4, s[0:3], 0 offen 78; GISEL-NEXT: global_store_dwordx4 v5, v[0:3], s[4:5] 79; GISEL-NEXT: global_store_dword v5, v4, s[4:5] offset:16 80; GISEL-NEXT: s_endpgm 81; 82; SDAG-LABEL: buffer_structs: 83; SDAG: ; %bb.0: ; %main_body 84; SDAG-NEXT: s_load_dword s6, s[4:5], 0x34 85; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 86; SDAG-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x44 87; SDAG-NEXT: v_mov_b32_e32 v4, 0 88; SDAG-NEXT: s_waitcnt lgkmcnt(0) 89; SDAG-NEXT: s_ashr_i32 s7, s6, 31 90; SDAG-NEXT: s_lshl_b64 s[4:5], s[6:7], 5 91; SDAG-NEXT: s_add_u32 s4, s8, s4 92; SDAG-NEXT: v_mov_b32_e32 v0, s6 93; SDAG-NEXT: s_addc_u32 s5, s9, s5 94; SDAG-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen 95; SDAG-NEXT: global_store_dword v4, v0, s[4:5] offset:16 96; SDAG-NEXT: v_mov_b32_e32 v0, s0 97; SDAG-NEXT: v_mov_b32_e32 v1, s1 98; SDAG-NEXT: v_mov_b32_e32 v2, s2 99; SDAG-NEXT: v_mov_b32_e32 v3, s3 100; SDAG-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5] 101; SDAG-NEXT: s_endpgm 102main_body: 103 %buffer = extractvalue %fat_buffer_struct %arg, 0 104 %offset = extractvalue %fat_buffer_struct %arg, 1 105 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %offset, ptr addrspace(8) %buffer, i32 %offset, i32 0, i32 0) 106 ; Confirm the alignment of this struct is 32 bytes 107 %dest.next = getelementptr %fat_buffer_struct, ptr addrspace(1) %dest, i32 %offset 108 store %fat_buffer_struct %arg, ptr addrspace(1) %dest.next 109 ret void 110} 111 112declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) 113declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32 immarg) 114