1; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -passes=loop-unroll -S %s | FileCheck %s 2 3; Check that we full unroll loop to be able to eliminate alloca 4; CHECK-LABEL: @non_invariant_ind 5; CHECK: for.body: 6; CHECK-NOT: br 7; CHECK: store i32 %tmp15, ptr addrspace(1) %arrayidx7, align 4 8; CHECK: ret void 9 10define amdgpu_kernel void @non_invariant_ind(ptr addrspace(1) nocapture %a, i32 %x) { 11entry: 12 %arr = alloca [64 x i32], align 4, addrspace(5) 13 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 14 br label %for.body 15 16for.cond.cleanup: ; preds = %for.body 17 %arrayidx5 = getelementptr inbounds [64 x i32], ptr addrspace(5) %arr, i32 0, i32 %x 18 %tmp15 = load i32, ptr addrspace(5) %arrayidx5, align 4 19 %arrayidx7 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %tmp1 20 store i32 %tmp15, ptr addrspace(1) %arrayidx7, align 4 21 ret void 22 23for.body: ; preds = %for.body, %entry 24 %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 25 %idxprom = sext i32 %i.015 to i64 26 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom 27 %tmp16 = load i32, ptr addrspace(1) %arrayidx, align 4 28 %add = add nsw i32 %i.015, %tmp1 29 %rem = srem i32 %add, 64 30 %arrayidx3 = getelementptr inbounds [64 x i32], ptr addrspace(5) %arr, i32 0, i32 %rem 31 store i32 %tmp16, ptr addrspace(5) %arrayidx3, align 4 32 %inc = add nuw nsw i32 %i.015, 1 33 %exitcond = icmp eq i32 %inc, 100 34 br i1 %exitcond, label %for.cond.cleanup, label %for.body 35} 36 37; Check that we unroll inner loop but not outer 38; CHECK-LABEL: @invariant_ind 39; CHECK: %[[exitcond:[^ ]+]] = icmp eq i32 %{{.*}}, 32 40; CHECK: br i1 %[[exitcond]] 41; CHECK-NOT: icmp eq i32 %{{.*}}, 100 42 43define amdgpu_kernel void @invariant_ind(ptr addrspace(1) nocapture %a, i32 %x) { 44entry: 45 %arr = alloca [64 x i32], align 4, addrspace(5) 46 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 47 br label %for.cond2.preheader 48 49for.cond2.preheader: ; preds = %for.cond.cleanup5, %entry 50 %i.026 = phi i32 [ 0, %entry ], [ %inc10, %for.cond.cleanup5 ] 51 %idxprom = sext i32 %i.026 to i64 52 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom 53 %tmp15 = load i32, ptr addrspace(1) %arrayidx, align 4 54 br label %for.body6 55 56for.cond.cleanup: ; preds = %for.cond.cleanup5 57 %arrayidx13 = getelementptr inbounds [64 x i32], ptr addrspace(5) %arr, i32 0, i32 %x 58 %tmp16 = load i32, ptr addrspace(5) %arrayidx13, align 4 59 %arrayidx15 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %tmp1 60 store i32 %tmp16, ptr addrspace(1) %arrayidx15, align 4 61 ret void 62 63for.cond.cleanup5: ; preds = %for.body6 64 %inc10 = add nuw nsw i32 %i.026, 1 65 %exitcond27 = icmp eq i32 %inc10, 32 66 br i1 %exitcond27, label %for.cond.cleanup, label %for.cond2.preheader 67 68for.body6: ; preds = %for.body6, %for.cond2.preheader 69 %j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ] 70 %add = add nsw i32 %j.025, %tmp1 71 %rem = srem i32 %add, 64 72 %arrayidx8 = getelementptr inbounds [64 x i32], ptr addrspace(5) %arr, i32 0, i32 %rem 73 store i32 %tmp15, ptr addrspace(5) %arrayidx8, align 4 74 %inc = add nuw nsw i32 %j.025, 1 75 %exitcond = icmp eq i32 %inc, 100 76 br i1 %exitcond, label %for.cond.cleanup5, label %for.body6 77} 78 79; Check we do not enforce unroll if alloca is too big 80; CHECK-LABEL: @too_big 81; CHECK: for.body: 82; CHECK: icmp eq i32 %{{.*}}, 100 83; CHECK: br 84 85define amdgpu_kernel void @too_big(ptr addrspace(1) nocapture %a, i32 %x) { 86entry: 87 %arr = alloca [256 x i32], align 4, addrspace(5) 88 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 89 br label %for.body 90 91for.cond.cleanup: ; preds = %for.body 92 %arrayidx5 = getelementptr inbounds [256 x i32], ptr addrspace(5) %arr, i32 0, i32 %x 93 %tmp15 = load i32, ptr addrspace(5) %arrayidx5, align 4 94 %arrayidx7 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %tmp1 95 store i32 %tmp15, ptr addrspace(1) %arrayidx7, align 4 96 ret void 97 98for.body: ; preds = %for.body, %entry 99 %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 100 %idxprom = sext i32 %i.015 to i64 101 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom 102 %tmp16 = load i32, ptr addrspace(1) %arrayidx, align 4 103 %add = add nsw i32 %i.015, %tmp1 104 %rem = srem i32 %add, 64 105 %arrayidx3 = getelementptr inbounds [256 x i32], ptr addrspace(5) %arr, i32 0, i32 %rem 106 store i32 %tmp16, ptr addrspace(5) %arrayidx3, align 4 107 %inc = add nuw nsw i32 %i.015, 1 108 %exitcond = icmp eq i32 %inc, 100 109 br i1 %exitcond, label %for.cond.cleanup, label %for.body 110} 111 112; Check we do not enforce unroll if alloca is dynamic 113; CHECK-LABEL: @dynamic_size_alloca( 114; CHECK: alloca i32, i32 %n 115; CHECK: for.body: 116; CHECK: icmp eq i32 %{{.*}}, 100 117; CHECK: br 118 119define amdgpu_kernel void @dynamic_size_alloca(ptr addrspace(1) nocapture %a, i32 %n, i32 %x) { 120entry: 121 %arr = alloca i32, i32 %n, align 4, addrspace(5) 122 %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 123 br label %for.body 124 125for.cond.cleanup: ; preds = %for.body 126 %arrayidx5 = getelementptr inbounds i32, ptr addrspace(5) %arr, i32 %x 127 %tmp15 = load i32, ptr addrspace(5) %arrayidx5, align 4 128 %arrayidx7 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 %tmp1 129 store i32 %tmp15, ptr addrspace(1) %arrayidx7, align 4 130 ret void 131 132for.body: ; preds = %for.body, %entry 133 %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 134 %idxprom = sext i32 %i.015 to i64 135 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idxprom 136 %tmp16 = load i32, ptr addrspace(1) %arrayidx, align 4 137 %add = add nsw i32 %i.015, %tmp1 138 %rem = srem i32 %add, 64 139 %arrayidx3 = getelementptr inbounds i32, ptr addrspace(5) %arr, i32 %rem 140 store i32 %tmp16, ptr addrspace(5) %arrayidx3, align 4 141 %inc = add nuw nsw i32 %i.015, 1 142 %exitcond = icmp eq i32 %inc, 100 143 br i1 %exitcond, label %for.cond.cleanup, label %for.body 144} 145 146declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1 147 148declare i32 @llvm.amdgcn.workitem.id.x() #1 149 150declare i32 @llvm.amdgcn.workgroup.id.x() #1 151 152declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1 153 154attributes #1 = { nounwind readnone } 155