1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck -check-prefixes=GFX950,GFX950-GISEL %s 4; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-SDAG %s 5; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx940 -filetype=null < %s 2>&1 | FileCheck -check-prefix=ERR-GISEL %s 6 7; ERR-SDAG: LLVM ERROR: Do not know how to expand this operator's operand! 8; ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.struct.ptr.buffer.load.lds), 9 10declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) 11 12;---------------------------------------------------------------------y 13; dwordx3 14;--------------------------------------------------------------------- 15 16define amdgpu_ps float @buffer_load_lds_dwordx3(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 17; GFX950-SDAG-LABEL: buffer_load_lds_dwordx3: 18; GFX950-SDAG: ; %bb.0: 19; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8 20; GFX950-SDAG-NEXT: s_mov_b32 m0, s4 21; GFX950-SDAG-NEXT: s_nop 0 22; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds 23; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds 24; GFX950-SDAG-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds 25; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 26; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 27; GFX950-SDAG-NEXT: ds_read_b32 v0, v0 28; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 29; GFX950-SDAG-NEXT: ; return to shader part epilog 30; 31; GFX950-GISEL-LABEL: buffer_load_lds_dwordx3: 32; GFX950-GISEL: ; %bb.0: 33; GFX950-GISEL-NEXT: s_mov_b32 m0, s4 34; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8 35; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen lds 36; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:4 sc0 lds 37; GFX950-GISEL-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:8 nt lds 38; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4 39; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) 40; GFX950-GISEL-NEXT: ds_read_b32 v0, v0 41; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 42; GFX950-GISEL-NEXT: ; return to shader part epilog 43 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 0, i32 0) 44 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 4, i32 1) 45 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 8, i32 0, i32 0, i32 8, i32 2) 46 %res = load float, ptr addrspace(3) %lds 47 ret float %res 48} 49 50define amdgpu_ps void @buffer_load_lds_dwordx3_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { 51; GFX950-LABEL: buffer_load_lds_dwordx3_imm_offset: 52; GFX950: ; %bb.0: 53; GFX950-NEXT: s_mov_b32 m0, s4 54; GFX950-NEXT: s_nop 0 55; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], 0 idxen offset:2048 lds 56; GFX950-NEXT: s_endpgm 57 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 58 ret void 59} 60 61define amdgpu_ps void @buffer_load_lds_dwordx3_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) { 62; GFX950-LABEL: buffer_load_lds_dwordx3_v_offset: 63; GFX950: ; %bb.0: 64; GFX950-NEXT: s_mov_b32 m0, s4 65; GFX950-NEXT: s_nop 0 66; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], 0 idxen offen lds 67; GFX950-NEXT: s_endpgm 68 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) 69 ret void 70} 71 72define amdgpu_ps void @buffer_load_lds_dwordx3_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) { 73; GFX950-LABEL: buffer_load_lds_dwordx3_s_offset: 74; GFX950: ; %bb.0: 75; GFX950-NEXT: s_mov_b32 m0, s4 76; GFX950-NEXT: s_nop 0 77; GFX950-NEXT: buffer_load_dwordx3 v0, s[0:3], s5 idxen lds 78; GFX950-NEXT: s_endpgm 79 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) 80 ret void 81} 82 83define amdgpu_ps void @buffer_load_lds_dwordx3_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 84; GFX950-LABEL: buffer_load_lds_dwordx3_vs_offset: 85; GFX950: ; %bb.0: 86; GFX950-NEXT: s_mov_b32 m0, s4 87; GFX950-NEXT: s_nop 0 88; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen lds 89; GFX950-NEXT: s_endpgm 90 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) 91 ret void 92} 93 94define amdgpu_ps void @buffer_load_lds_dwordx3_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 95; GFX950-LABEL: buffer_load_lds_dwordx3_vs_imm_offset: 96; GFX950: ; %bb.0: 97; GFX950-NEXT: s_mov_b32 m0, s4 98; GFX950-NEXT: s_nop 0 99; GFX950-NEXT: buffer_load_dwordx3 v[0:1], s[0:3], s5 idxen offen offset:2048 lds 100; GFX950-NEXT: s_endpgm 101 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 12, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) 102 ret void 103} 104 105;---------------------------------------------------------------------y 106; dwordx4 107;--------------------------------------------------------------------- 108 109define amdgpu_ps float @buffer_load_lds_dwordx4(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { 110; GFX950-SDAG-LABEL: buffer_load_lds_dwordx4: 111; GFX950-SDAG: ; %bb.0: 112; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, 8 113; GFX950-SDAG-NEXT: s_mov_b32 m0, s4 114; GFX950-SDAG-NEXT: s_nop 0 115; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds 116; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds 117; GFX950-SDAG-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds 118; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 119; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 120; GFX950-SDAG-NEXT: ds_read_b32 v0, v0 121; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 122; GFX950-SDAG-NEXT: ; return to shader part epilog 123; 124; GFX950-GISEL-LABEL: buffer_load_lds_dwordx4: 125; GFX950-GISEL: ; %bb.0: 126; GFX950-GISEL-NEXT: s_mov_b32 m0, s4 127; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, 8 128; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen lds 129; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:4 sc0 lds 130; GFX950-GISEL-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:8 nt lds 131; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s4 132; GFX950-GISEL-NEXT: s_waitcnt vmcnt(0) 133; GFX950-GISEL-NEXT: ds_read_b32 v0, v0 134; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 135; GFX950-GISEL-NEXT: ; return to shader part epilog 136 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 0, i32 0) 137 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 4, i32 1) 138 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 8, i32 0, i32 0, i32 8, i32 2) 139 %res = load float, ptr addrspace(3) %lds 140 ret float %res 141} 142 143define amdgpu_ps void @buffer_load_lds_dwordx4_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { 144; GFX950-LABEL: buffer_load_lds_dwordx4_imm_offset: 145; GFX950: ; %bb.0: 146; GFX950-NEXT: s_mov_b32 m0, s4 147; GFX950-NEXT: s_nop 0 148; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], 0 idxen offset:2048 lds 149; GFX950-NEXT: s_endpgm 150 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) 151 ret void 152} 153 154define amdgpu_ps void @buffer_load_lds_dwordx4_v_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) { 155; GFX950-LABEL: buffer_load_lds_dwordx4_v_offset: 156; GFX950: ; %bb.0: 157; GFX950-NEXT: s_mov_b32 m0, s4 158; GFX950-NEXT: s_nop 0 159; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], 0 idxen offen lds 160; GFX950-NEXT: s_endpgm 161 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) 162 ret void 163} 164 165define amdgpu_ps void @buffer_load_lds_dwordx4_s_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) { 166; GFX950-LABEL: buffer_load_lds_dwordx4_s_offset: 167; GFX950: ; %bb.0: 168; GFX950-NEXT: s_mov_b32 m0, s4 169; GFX950-NEXT: s_nop 0 170; GFX950-NEXT: buffer_load_dwordx4 v0, s[0:3], s5 idxen lds 171; GFX950-NEXT: s_endpgm 172 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) 173 ret void 174} 175 176define amdgpu_ps void @buffer_load_lds_dwordx4_vs_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 177; GFX950-LABEL: buffer_load_lds_dwordx4_vs_offset: 178; GFX950: ; %bb.0: 179; GFX950-NEXT: s_mov_b32 m0, s4 180; GFX950-NEXT: s_nop 0 181; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen lds 182; GFX950-NEXT: s_endpgm 183 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) 184 ret void 185} 186 187define amdgpu_ps void @buffer_load_lds_dwordx4_vs_imm_offset(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 188; GFX950-LABEL: buffer_load_lds_dwordx4_vs_imm_offset: 189; GFX950: ; %bb.0: 190; GFX950-NEXT: s_mov_b32 m0, s4 191; GFX950-NEXT: s_nop 0 192; GFX950-NEXT: buffer_load_dwordx4 v[0:1], s[0:3], s5 idxen offen offset:2048 lds 193; GFX950-NEXT: s_endpgm 194 call void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 16, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) 195 ret void 196} 197