1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck --enable-var-scope %s 2 3declare void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0 4declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0 5declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0 6 7declare void @llvm.memmove.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0 8declare void @llvm.memmove.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0 9declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0 10 11declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i1) #0 12 13declare i32 @llvm.objectsize.i32.p5(ptr addrspace(5), i1, i1, i1) #1 14 15; CHECK-LABEL: @promote_with_memcpy( 16; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}} 17; CHECK: call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 [[GEP]], ptr addrspace(1) align 4 %in, i32 68, i1 false) 18; CHECK: call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 [[GEP]], i32 68, i1 false) 19define amdgpu_kernel void @promote_with_memcpy(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 20 %alloca = alloca [17 x i32], align 4, addrspace(5) 21 call void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) align 4 %alloca, ptr addrspace(1) align 4 %in, i32 68, i1 false) 22 call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 %out, ptr addrspace(5) align 4 %alloca, i32 68, i1 false) 23 ret void 24} 25 26; CHECK-LABEL: @promote_with_memmove( 27; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}} 28; CHECK: call void @llvm.memmove.p3.p1.i32(ptr addrspace(3) align 4 [[GEP]], ptr addrspace(1) align 4 %in, i32 68, i1 false) 29; CHECK: call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) align 4 %out, ptr addrspace(3) align 4 [[GEP]], i32 68, i1 false) 30define amdgpu_kernel void @promote_with_memmove(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 31 %alloca = alloca [17 x i32], align 4, addrspace(5) 32 call void @llvm.memmove.p5.p1.i32(ptr addrspace(5) align 4 %alloca, ptr addrspace(1) align 4 %in, i32 68, i1 false) 33 call void @llvm.memmove.p1.p5.i32(ptr addrspace(1) align 4 %out, ptr addrspace(5) align 4 %alloca, i32 68, i1 false) 34 ret void 35} 36 37; CHECK-LABEL: @promote_with_memset( 38; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}} 39; CHECK: call void @llvm.memset.p3.i32(ptr addrspace(3) align 4 [[GEP]], i8 7, i32 68, i1 false) 40define amdgpu_kernel void @promote_with_memset(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 41 %alloca = alloca [17 x i32], align 4, addrspace(5) 42 call void @llvm.memset.p5.i32(ptr addrspace(5) align 4 %alloca, i8 7, i32 68, i1 false) 43 ret void 44} 45 46; CHECK-LABEL: @promote_with_objectsize( 47; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}} 48; CHECK: call i32 @llvm.objectsize.i32.p3(ptr addrspace(3) [[PTR]], i1 false, i1 false, i1 false) 49define amdgpu_kernel void @promote_with_objectsize(ptr addrspace(1) %out) #0 { 50 %alloca = alloca [17 x i32], align 4, addrspace(5) 51 %size = call i32 @llvm.objectsize.i32.p5(ptr addrspace(5) %alloca, i1 false, i1 false, i1 false) 52 store i32 %size, ptr addrspace(1) %out 53 ret void 54} 55 56; CHECK-LABEL: @promote_with_objectsize_8( 57; CHECK: store i32 32, ptr addrspace(1) %out, align 4 58define amdgpu_kernel void @promote_with_objectsize_8(ptr addrspace(1) %out) #0 { 59 %alloca = alloca [8 x i32], align 4, addrspace(5) 60 %size = call i32 @llvm.objectsize.i32.p5(ptr addrspace(5) %alloca, i1 false, i1 false, i1 false) 61 store i32 %size, ptr addrspace(1) %out 62 ret void 63} 64; CHECK-LABEL: @promote_alloca_used_twice_in_memcpy( 65; CHECK: call void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) align 8 dereferenceable(16) %arrayidx1, ptr addrspace(3) align 8 dereferenceable(16) %arrayidx2, i64 16, i1 false) 66define amdgpu_kernel void @promote_alloca_used_twice_in_memcpy(i32 %c) { 67entry: 68 %r = alloca double, align 8, addrspace(5) 69 %arrayidx1 = getelementptr inbounds double, ptr addrspace(5) %r, i32 1 70 %arrayidx2 = getelementptr inbounds double, ptr addrspace(5) %r, i32 %c 71 call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 dereferenceable(16) %arrayidx1, ptr addrspace(5) align 8 dereferenceable(16) %arrayidx2, i64 16, i1 false) 72 ret void 73} 74 75; CHECK-LABEL: @promote_alloca_used_twice_in_memmove( 76; CHECK: call void @llvm.memmove.p3.p3.i64(ptr addrspace(3) align 8 dereferenceable(16) %arrayidx1, ptr addrspace(3) align 8 dereferenceable(16) %arrayidx2, i64 16, i1 false) 77define amdgpu_kernel void @promote_alloca_used_twice_in_memmove(i32 %c) { 78entry: 79 %r = alloca double, align 8, addrspace(5) 80 %arrayidx1 = getelementptr inbounds double, ptr addrspace(5) %r, i32 1 81 %arrayidx2 = getelementptr inbounds double, ptr addrspace(5) %r, i32 %c 82 call void @llvm.memmove.p5.p5.i64(ptr addrspace(5) align 8 dereferenceable(16) %arrayidx1, ptr addrspace(5) align 8 dereferenceable(16) %arrayidx2, i64 16, i1 false) 83 ret void 84} 85 86attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" } 87attributes #1 = { nounwind readnone } 88