1; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s 2; RUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s 3; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s 4; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s 5 6; Disable optimizations in case there are optimizations added that 7; specialize away generic pointer accesses. 8 9 10; CHECK-LABEL: {{^}}branch_use_flat_i32: 11; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, [M0, FLAT_SCRATCH] 12; CHECK: s_endpgm 13define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 { 14entry: 15 %cmp = icmp ne i32 %c, 0 16 br i1 %cmp, label %local, label %global 17 18local: 19 %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)* 20 br label %end 21 22global: 23 %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* 24 br label %end 25 26end: 27 %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ] 28 store i32 %x, i32 addrspace(4)* %fptr, align 4 29; %val = load i32 addrspace(4)* %fptr, align 4 30; store i32 %val, i32 addrspace(1)* %out, align 4 31 ret void 32} 33 34 35 36; These testcases might become useless when there are optimizations to 37; remove generic pointers. 38 39; CHECK-LABEL: {{^}}store_flat_i32: 40; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}} 41; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}} 42; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}} 43; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}} 44define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 { 45 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* 46 store i32 %x, i32 addrspace(4)* %fptr, align 4 47 ret void 48} 49 50; CHECK-LABEL: {{^}}store_flat_i64: 51; CHECK: flat_store_dwordx2 52define void @store_flat_i64(i64 addrspace(1)* %gptr, i64 %x) #0 { 53 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* 54 store i64 %x, i64 addrspace(4)* %fptr, align 8 55 ret void 56} 57 58; CHECK-LABEL: {{^}}store_flat_v4i32: 59; CHECK: flat_store_dwordx4 60define void @store_flat_v4i32(<4 x i32> addrspace(1)* %gptr, <4 x i32> %x) #0 { 61 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* 62 store <4 x i32> %x, <4 x i32> addrspace(4)* %fptr, align 16 63 ret void 64} 65 66; CHECK-LABEL: {{^}}store_flat_trunc_i16: 67; CHECK: flat_store_short 68define void @store_flat_trunc_i16(i16 addrspace(1)* %gptr, i32 %x) #0 { 69 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 70 %y = trunc i32 %x to i16 71 store i16 %y, i16 addrspace(4)* %fptr, align 2 72 ret void 73} 74 75; CHECK-LABEL: {{^}}store_flat_trunc_i8: 76; CHECK: flat_store_byte 77define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 { 78 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 79 %y = trunc i32 %x to i8 80 store i8 %y, i8 addrspace(4)* %fptr, align 2 81 ret void 82} 83 84 85 86; CHECK-LABEL @load_flat_i32: 87; CHECK: flat_load_dword 88define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 { 89 %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)* 90 %fload = load i32 addrspace(4)* %fptr, align 4 91 store i32 %fload, i32 addrspace(1)* %out, align 4 92 ret void 93} 94 95; CHECK-LABEL @load_flat_i64: 96; CHECK: flat_load_dwordx2 97define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 { 98 %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)* 99 %fload = load i64 addrspace(4)* %fptr, align 4 100 store i64 %fload, i64 addrspace(1)* %out, align 8 101 ret void 102} 103 104; CHECK-LABEL @load_flat_v4i32: 105; CHECK: flat_load_dwordx4 106define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 { 107 %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)* 108 %fload = load <4 x i32> addrspace(4)* %fptr, align 4 109 store <4 x i32> %fload, <4 x i32> addrspace(1)* %out, align 8 110 ret void 111} 112 113; CHECK-LABEL @sextload_flat_i8: 114; CHECK: flat_load_sbyte 115define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { 116 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 117 %fload = load i8 addrspace(4)* %fptr, align 4 118 %ext = sext i8 %fload to i32 119 store i32 %ext, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; CHECK-LABEL @zextload_flat_i8: 124; CHECK: flat_load_ubyte 125define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 { 126 %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)* 127 %fload = load i8 addrspace(4)* %fptr, align 4 128 %ext = zext i8 %fload to i32 129 store i32 %ext, i32 addrspace(1)* %out, align 4 130 ret void 131} 132 133; CHECK-LABEL @sextload_flat_i16: 134; CHECK: flat_load_sshort 135define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { 136 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 137 %fload = load i16 addrspace(4)* %fptr, align 4 138 %ext = sext i16 %fload to i32 139 store i32 %ext, i32 addrspace(1)* %out, align 4 140 ret void 141} 142 143; CHECK-LABEL @zextload_flat_i16: 144; CHECK: flat_load_ushort 145define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 { 146 %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)* 147 %fload = load i16 addrspace(4)* %fptr, align 4 148 %ext = zext i16 %fload to i32 149 store i32 %ext, i32 addrspace(1)* %out, align 4 150 ret void 151} 152 153 154 155; TODO: This should not be zero when registers are used for small 156; scratch allocations again. 157 158; Check for prologue initializing special SGPRs pointing to scratch. 159; CHECK-LABEL: {{^}}store_flat_scratch: 160; CHECK: s_movk_i32 flat_scratch_lo, 0 161; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}} 162; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}} 163; CHECK: flat_store_dword 164; CHECK: s_barrier 165; CHECK: flat_load_dword 166define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 { 167 %alloca = alloca i32, i32 9, align 4 168 %x = call i32 @llvm.r600.read.tidig.x() #3 169 %pptr = getelementptr i32* %alloca, i32 %x 170 %fptr = addrspacecast i32* %pptr to i32 addrspace(4)* 171 store i32 %x, i32 addrspace(4)* %fptr 172 ; Dummy call 173 call void @llvm.AMDGPU.barrier.local() #1 174 %reload = load i32 addrspace(4)* %fptr, align 4 175 store i32 %reload, i32 addrspace(1)* %out, align 4 176 ret void 177} 178 179declare void @llvm.AMDGPU.barrier.local() #1 180declare i32 @llvm.r600.read.tidig.x() #3 181 182attributes #0 = { nounwind } 183attributes #1 = { nounwind noduplicate } 184attributes #3 = { nounwind readnone } 185