1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL 4 5declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) 6 7define amdgpu_ps float @buffer_load_lds_dword(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 8; SDAG-LABEL: buffer_load_lds_dword: 9; SDAG: ; %bb.0: ; %main_body 10; SDAG-NEXT: v_mov_b32_e32 v0, 8 11; SDAG-NEXT: s_mov_b32 m0, s4 12; SDAG-NEXT: s_nop 0 13; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 14; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 15; SDAG-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 16; SDAG-NEXT: v_mov_b32_e32 v0, s4 17; SDAG-NEXT: s_waitcnt vmcnt(0) 18; SDAG-NEXT: ds_read_b32 v0, v0 19; SDAG-NEXT: s_waitcnt lgkmcnt(0) 20; SDAG-NEXT: ; return to shader part epilog 21; 22; GISEL-LABEL: buffer_load_lds_dword: 23; GISEL: ; %bb.0: ; %main_body 24; GISEL-NEXT: s_mov_b32 m0, s4 25; GISEL-NEXT: v_mov_b32_e32 v0, 8 26; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen lds 27; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:4 glc lds 28; GISEL-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:8 slc lds 29; GISEL-NEXT: v_mov_b32_e32 v0, s4 30; GISEL-NEXT: s_waitcnt vmcnt(0) 31; GISEL-NEXT: ds_read_b32 v0, v0 32; GISEL-NEXT: s_waitcnt lgkmcnt(0) 33; GISEL-NEXT: ; return to shader part epilog 34main_body: 35 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) 36 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) 37 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) 38 %res = load float, ptr addrspace(3) %lds 39 ret float %res 40} 41 42define amdgpu_ps void @buffer_load_lds_dword_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { 43; GCN-LABEL: buffer_load_lds_dword_imm_offset: 44; GCN: ; %bb.0: ; %main_body 45; GCN-NEXT: s_mov_b32 m0, s4 46; GCN-NEXT: s_nop 0 47; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:2048 lds 48; GCN-NEXT: s_endpgm 49main_body: 50 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 51 ret void 52} 53 54define amdgpu_ps void @buffer_load_lds_dword_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) { 55; GCN-LABEL: buffer_load_lds_dword_v_offset: 56; GCN: ; %bb.0: ; %main_body 57; GCN-NEXT: s_mov_b32 m0, s4 58; GCN-NEXT: s_nop 0 59; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], 0 idxen offen lds 60; GCN-NEXT: s_endpgm 61main_body: 62 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) 63 ret void 64} 65 66define amdgpu_ps void @buffer_load_lds_dword_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) { 67; GCN-LABEL: buffer_load_lds_dword_s_offset: 68; GCN: ; %bb.0: ; %main_body 69; GCN-NEXT: s_mov_b32 m0, s4 70; GCN-NEXT: s_nop 0 71; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 idxen lds 72; GCN-NEXT: s_endpgm 73main_body: 74 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) 75 ret void 76} 77 78define amdgpu_ps void @buffer_load_lds_dword_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 79; GCN-LABEL: buffer_load_lds_dword_vs_offset: 80; GCN: ; %bb.0: ; %main_body 81; GCN-NEXT: s_mov_b32 m0, s4 82; GCN-NEXT: s_nop 0 83; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen lds 84; GCN-NEXT: s_endpgm 85main_body: 86 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) 87 ret void 88} 89 90define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 91; GCN-LABEL: buffer_load_lds_dword_vs_imm_offset: 92; GCN: ; %bb.0: ; %main_body 93; GCN-NEXT: s_mov_b32 m0, s4 94; GCN-NEXT: s_nop 0 95; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen offset:2048 lds 96; GCN-NEXT: s_endpgm 97main_body: 98 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) 99 ret void 100} 101 102define amdgpu_ps void @buffer_load_lds_ushort(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { 103; GCN-LABEL: buffer_load_lds_ushort: 104; GCN: ; %bb.0: ; %main_body 105; GCN-NEXT: v_mov_b32_e32 v1, 0x800 106; GCN-NEXT: s_mov_b32 m0, s4 107; GCN-NEXT: s_nop 0 108; GCN-NEXT: buffer_load_ushort v[0:1], s[0:3], 0 idxen offen lds 109; GCN-NEXT: s_endpgm 110main_body: 111 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 2, i32 %vindex, i32 2048, i32 0, i32 0, i32 0) 112 ret void 113} 114 115define amdgpu_ps void @buffer_load_lds_ubyte(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { 116; GCN-LABEL: buffer_load_lds_ubyte: 117; GCN: ; %bb.0: ; %main_body 118; GCN-NEXT: s_mov_b32 m0, s4 119; GCN-NEXT: s_nop 0 120; GCN-NEXT: buffer_load_ubyte v0, s[0:3], 0 idxen offset:2048 lds 121; GCN-NEXT: s_endpgm 122main_body: 123 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 1, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 124 ret void 125} 126