1*92703280SMatt Arsenault; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2*92703280SMatt Arsenault; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s 3*92703280SMatt Arsenault; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s 4*92703280SMatt Arsenault; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s 5*92703280SMatt Arsenault; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s 6*92703280SMatt Arsenault 7*92703280SMatt Arsenault; FIXME: Not a great error 8*92703280SMatt Arsenault; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand! 9*92703280SMatt Arsenault; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.raw.ptr.buffer.load.lds), 10*92703280SMatt Arsenault 11*92703280SMatt Arsenaultdeclare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) 12*92703280SMatt Arsenault 13*92703280SMatt Arsenault;---------------------------------------------------------------------y 14*92703280SMatt Arsenault; dwordx3 15*92703280SMatt Arsenault;--------------------------------------------------------------------- 16*92703280SMatt Arsenault 17*92703280SMatt Arsenaultdefine amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 18*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3: 19*92703280SMatt Arsenault; GFX950: ; %bb.0: ; %main_body 20*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 21*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 22*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds 23*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds 24*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds 25*92703280SMatt Arsenault; GFX950-NEXT: v_mov_b32_e32 v0, s4 26*92703280SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 27*92703280SMatt Arsenault; GFX950-NEXT: ds_read_b32 v0, v0 28*92703280SMatt Arsenault; GFX950-NEXT: s_waitcnt lgkmcnt(0) 29*92703280SMatt Arsenault; GFX950-NEXT: ; return to shader part epilog 30*92703280SMatt Arsenaultmain_body: 31*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) 32*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) 33*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) 34*92703280SMatt Arsenault %res = load float, ptr addrspace(3) %lds 35*92703280SMatt Arsenault ret float %res 36*92703280SMatt Arsenault} 37*92703280SMatt Arsenault 38*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 39*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_imm_voffset: 40*92703280SMatt Arsenault; GFX950: ; %bb.0: 41*92703280SMatt Arsenault; GFX950-NEXT: v_mov_b32_e32 v0, 0x800 42*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 43*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 44*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds 45*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 46*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 2048, i32 0, i32 0, i32 0) 47*92703280SMatt Arsenault ret void 48*92703280SMatt Arsenault} 49*92703280SMatt Arsenault 50*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) { 51*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset: 52*92703280SMatt Arsenault; GFX950: ; %bb.0: 53*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 54*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 55*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 offen lds 56*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 57*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 0, i32 0, i32 0) 58*92703280SMatt Arsenault ret void 59*92703280SMatt Arsenault} 60*92703280SMatt Arsenault 61*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) { 62*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset: 63*92703280SMatt Arsenault; GFX950: ; %bb.0: 64*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 65*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 66*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx3 off, s[0:3], s5 lds 67*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 68*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 0, i32 %soffset, i32 0, i32 0) 69*92703280SMatt Arsenault ret void 70*92703280SMatt Arsenault} 71*92703280SMatt Arsenault 72*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { 73*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset: 74*92703280SMatt Arsenault; GFX950: ; %bb.0: 75*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 76*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 77*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen lds 78*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 79*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 0, i32 0) 80*92703280SMatt Arsenault ret void 81*92703280SMatt Arsenault} 82*92703280SMatt Arsenault 83*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { 84*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset: 85*92703280SMatt Arsenault; GFX950: ; %bb.0: 86*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 87*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 88*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 offen offset:2048 lds 89*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 90*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %voffset, i32 %soffset, i32 2048, i32 0) 91*92703280SMatt Arsenault ret void 92*92703280SMatt Arsenault} 93*92703280SMatt Arsenault 94*92703280SMatt Arsenault;---------------------------------------------------------------------y 95*92703280SMatt Arsenault; dwordx4 96*92703280SMatt Arsenault;--------------------------------------------------------------------- 97*92703280SMatt Arsenault 98*92703280SMatt Arsenaultdefine amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 99*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4: 100*92703280SMatt Arsenault; GFX950: ; %bb.0: ; %main_body 101*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 102*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 103*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 lds 104*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:4 sc0 lds 105*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dword off, s[0:3], 0 offset:8 nt lds 106*92703280SMatt Arsenault; GFX950-NEXT: v_mov_b32_e32 v0, s4 107*92703280SMatt Arsenault; GFX950-NEXT: s_waitcnt vmcnt(0) 108*92703280SMatt Arsenault; GFX950-NEXT: ds_read_b32 v0, v0 109*92703280SMatt Arsenault; GFX950-NEXT: s_waitcnt lgkmcnt(0) 110*92703280SMatt Arsenault; GFX950-NEXT: ; return to shader part epilog 111*92703280SMatt Arsenaultmain_body: 112*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) 113*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) 114*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) 115*92703280SMatt Arsenault %res = load float, ptr addrspace(3) %lds 116*92703280SMatt Arsenault ret float %res 117*92703280SMatt Arsenault} 118*92703280SMatt Arsenault 119*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_imm_voffset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 120*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_imm_voffset: 121*92703280SMatt Arsenault; GFX950: ; %bb.0: 122*92703280SMatt Arsenault; GFX950-NEXT: v_mov_b32_e32 v0, 0x800 123*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 124*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 125*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds 126*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 127*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 2048, i32 0, i32 0, i32 0) 128*92703280SMatt Arsenault ret void 129*92703280SMatt Arsenault} 130*92703280SMatt Arsenault 131*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) { 132*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset: 133*92703280SMatt Arsenault; GFX950: ; %bb.0: 134*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 135*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 136*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 offen lds 137*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 138*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 0, i32 0, i32 0) 139*92703280SMatt Arsenault ret void 140*92703280SMatt Arsenault} 141*92703280SMatt Arsenault 142*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) { 143*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset: 144*92703280SMatt Arsenault; GFX950: ; %bb.0: 145*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 146*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 147*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx4 off, s[0:3], s5 lds 148*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 149*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 0, i32 %soffset, i32 0, i32 0) 150*92703280SMatt Arsenault ret void 151*92703280SMatt Arsenault} 152*92703280SMatt Arsenault 153*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { 154*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset: 155*92703280SMatt Arsenault; GFX950: ; %bb.0: 156*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 157*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 158*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen lds 159*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 160*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 0, i32 0) 161*92703280SMatt Arsenault ret void 162*92703280SMatt Arsenault} 163*92703280SMatt Arsenault 164*92703280SMatt Arsenaultdefine amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { 165*92703280SMatt Arsenault; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset: 166*92703280SMatt Arsenault; GFX950: ; %bb.0: 167*92703280SMatt Arsenault; GFX950-NEXT: s_mov_b32 m0, s4 168*92703280SMatt Arsenault; GFX950-NEXT: s_nop 0 169*92703280SMatt Arsenault; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 offen offset:2048 lds 170*92703280SMatt Arsenault; GFX950-NEXT: s_endpgm 171*92703280SMatt Arsenault call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %voffset, i32 %soffset, i32 2048, i32 0) 172*92703280SMatt Arsenault ret void 173*92703280SMatt Arsenault} 174*92703280SMatt Arsenault;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 175*92703280SMatt Arsenault; GFX950-GISEL: {{.*}} 176*92703280SMatt Arsenault; GFX950-SDAG: {{.*}} 177