1; RUN: llc -mtriple=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s 2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SICIVI %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=+max-private-element-size-16 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4 5; Test addressing modes when the scratch base is not a frame index. 6 7; GCN-LABEL: {{^}}store_private_offset_i8: 8; GCN: buffer_store_byte v{{[0-9]+}}, off, s[12:15], 0 offset:8 9define amdgpu_kernel void @store_private_offset_i8() #0 { 10 store volatile i8 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 11 ret void 12} 13 14; GCN-LABEL: {{^}}store_private_offset_i16: 15; GCN: buffer_store_short v{{[0-9]+}}, off, s[12:15], 0 offset:8 16define amdgpu_kernel void @store_private_offset_i16() #0 { 17 store volatile i16 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 18 ret void 19} 20 21; GCN-LABEL: {{^}}store_private_offset_i32: 22; GCN: buffer_store_dword v{{[0-9]+}}, off, s[12:15], 0 offset:8 23define amdgpu_kernel void @store_private_offset_i32() #0 { 24 store volatile i32 5, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 25 ret void 26} 27 28; GCN-LABEL: {{^}}store_private_offset_v2i32: 29; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8 30define amdgpu_kernel void @store_private_offset_v2i32() #0 { 31 store volatile <2 x i32> <i32 5, i32 10>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 32 ret void 33} 34 35; GCN-LABEL: {{^}}store_private_offset_v4i32: 36; GCN: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8 37define amdgpu_kernel void @store_private_offset_v4i32() #0 { 38 store volatile <4 x i32> <i32 5, i32 10, i32 15, i32 0>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 39 ret void 40} 41 42; GCN-LABEL: {{^}}load_private_offset_i8: 43; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[12:15], 0 offset:8 44define amdgpu_kernel void @load_private_offset_i8() #0 { 45 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 46 ret void 47} 48 49; GCN-LABEL: {{^}}sextload_private_offset_i8: 50; GCN: buffer_load_sbyte v{{[0-9]+}}, off, s[12:15], 0 offset:8 51define amdgpu_kernel void @sextload_private_offset_i8(ptr addrspace(1) %out) #0 { 52 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 53 %sextload = sext i8 %load to i32 54 store i32 %sextload, ptr addrspace(1) undef 55 ret void 56} 57 58; GCN-LABEL: {{^}}zextload_private_offset_i8: 59; GCN: buffer_load_ubyte v{{[0-9]+}}, off, s[12:15], 0 offset:8 60define amdgpu_kernel void @zextload_private_offset_i8(ptr addrspace(1) %out) #0 { 61 %load = load volatile i8, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 62 %zextload = zext i8 %load to i32 63 store i32 %zextload, ptr addrspace(1) undef 64 ret void 65} 66 67; GCN-LABEL: {{^}}load_private_offset_i16: 68; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[12:15], 0 offset:8 69define amdgpu_kernel void @load_private_offset_i16() #0 { 70 %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 71 ret void 72} 73 74; GCN-LABEL: {{^}}sextload_private_offset_i16: 75; GCN: buffer_load_sshort v{{[0-9]+}}, off, s[12:15], 0 offset:8 76define amdgpu_kernel void @sextload_private_offset_i16(ptr addrspace(1) %out) #0 { 77 %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 78 %sextload = sext i16 %load to i32 79 store i32 %sextload, ptr addrspace(1) undef 80 ret void 81} 82 83; GCN-LABEL: {{^}}zextload_private_offset_i16: 84; GCN: buffer_load_ushort v{{[0-9]+}}, off, s[12:15], 0 offset:8 85define amdgpu_kernel void @zextload_private_offset_i16(ptr addrspace(1) %out) #0 { 86 %load = load volatile i16, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 87 %zextload = zext i16 %load to i32 88 store i32 %zextload, ptr addrspace(1) undef 89 ret void 90} 91 92; GCN-LABEL: {{^}}load_private_offset_i32: 93; GCN: buffer_load_dword v{{[0-9]+}}, off, s[12:15], 0 offset:8 94define amdgpu_kernel void @load_private_offset_i32() #0 { 95 %load = load volatile i32, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 96 ret void 97} 98 99; GCN-LABEL: {{^}}load_private_offset_v2i32: 100; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8 101define amdgpu_kernel void @load_private_offset_v2i32() #0 { 102 %load = load volatile <2 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 103 ret void 104} 105 106; GCN-LABEL: {{^}}load_private_offset_v4i32: 107; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s[12:15], 0 offset:8 108define amdgpu_kernel void @load_private_offset_v4i32() #0 { 109 %load = load volatile <4 x i32>, ptr addrspace(5) inttoptr (i32 8 to ptr addrspace(5)) 110 ret void 111} 112 113; GCN-LABEL: {{^}}store_private_offset_i8_max_offset: 114; GCN: buffer_store_byte v{{[0-9]+}}, off, s[12:15], 0 offset:4095 115define amdgpu_kernel void @store_private_offset_i8_max_offset() #0 { 116 store volatile i8 5, ptr addrspace(5) inttoptr (i32 4095 to ptr addrspace(5)) 117 ret void 118} 119 120; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus1: 121; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 122; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[12:15], 0 offen{{$}} 123define amdgpu_kernel void @store_private_offset_i8_max_offset_plus1() #0 { 124 store volatile i8 5, ptr addrspace(5) inttoptr (i32 4096 to ptr addrspace(5)) 125 ret void 126} 127 128; GCN-LABEL: {{^}}store_private_offset_i8_max_offset_plus2: 129; GCN: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1000 130; GCN: buffer_store_byte v{{[0-9]+}}, [[OFFSET]], s[12:15], 0 offen offset:1{{$}} 131define amdgpu_kernel void @store_private_offset_i8_max_offset_plus2() #0 { 132 store volatile i8 5, ptr addrspace(5) inttoptr (i32 4097 to ptr addrspace(5)) 133 ret void 134} 135 136; MUBUF used for stack access has bounds checking enabled before gfx9, 137; so a possibly negative base index can't be used for the vgpr offset. 138 139; GCN-LABEL: {{^}}store_private_unknown_bits_vaddr: 140; SICIVI: {{buffer|flat}}_load_dword [[VADDR:v[0-9]+]], 141; SICIVI: v_lshlrev_b32_e32 [[ADDR:v[0-9]+]], 2, [[VADDR]] 142; SICIVI-NOT [[ADDR]] 143; SICIVI: v_add_{{i|u}}32_e32 [[ADDR1:v[0-9]+]], vcc, 32, [[ADDR]] 144; SICIVI: buffer_store_dword v{{[0-9]+}}, [[ADDR1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen{{$}} 145 146; GFX9: global_load_dword [[VADDR:v[0-9]+]], 147; GFX9: v_lshlrev_b32_e32 [[ADDR:v[0-9]+]], 2, [[VADDR]] 148; GFX9-NOT [[ADDR]] 149; GFX9: buffer_store_dword v{{[0-9]+}}, [[ADDR]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen offset:32 150define amdgpu_kernel void @store_private_unknown_bits_vaddr() #0 { 151 %alloca = alloca [16 x i32], align 4, addrspace(5) 152 %vaddr = load volatile i32, ptr addrspace(1) undef 153 %vaddr.off = add i32 %vaddr, 8 154 %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %vaddr.off 155 store volatile i32 9, ptr addrspace(5) %gep 156 ret void 157} 158 159attributes #0 = { nounwind } 160