1; RUN: opt -mtriple=armv7 -mcpu=cortex-a57 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL 2; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a57 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL 3; RUN: opt -mtriple=thumbv7 -mcpu=cortex-a72 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-NOUNROLL 4; RUN: opt -mtriple=thumbv8m -mcpu=cortex-m23 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL 5; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL 6; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -passes=loop-unroll -S %s -o - | FileCheck %s --check-prefix=CHECK-UNROLL 7 8; CHECK-LABEL: partial 9define arm_aapcs_vfpcc void @partial(ptr nocapture %C, ptr nocapture readonly %A, ptr nocapture readonly %B) local_unnamed_addr #0 { 10entry: 11 br label %for.body 12 13; CHECK-LABEL: for.body 14for.body: 15 16; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV2:%[a-z.0-9]+]], %for.body ] 17; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1 18; CHECK-NOUNROLL: [[IV2]] = add nuw nsw i32 [[IV0]], 2 19; CHECK-NOUNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV2]], 1024 20; CHECK-NOUNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body 21 22; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV16:%[a-z.0-9]+]], %for.body ] 23; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1 24; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2 25; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3 26; CHECK-UNROLL: [[IV4:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 4 27; CHECK-UNROLL: [[IV5:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 5 28; CHECK-UNROLL: [[IV6:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 6 29; CHECK-UNROLL: [[IV7:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 7 30; CHECK-UNROLL: [[IV8:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 8 31; CHECK-UNROLL: [[IV9:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 9 32; CHECK-UNROLL: [[IV10:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 10 33; CHECK-UNROLL: [[IV11:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 11 34; CHECK-UNROLL: [[IV12:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 12 35; CHECK-UNROLL: [[IV13:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 13 36; CHECK-UNROLL: [[IV14:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 14 37; CHECK-UNROLL: [[IV15:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 15 38; CHECK-UNROLL: [[IV16]] = add nuw nsw i32 [[IV0]], 16 39; CHECK-UNROLL: [[CMP:%[a-z.0-9]+]] = icmp eq i32 [[IV16]], 1024 40; CHECK-UNROLL: br i1 [[CMP]], label [[END:%[a-z.]+]], label %for.body 41 42 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 43 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.08 44 %0 = load i32, ptr %arrayidx, align 4 45 %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.08 46 %1 = load i32, ptr %arrayidx1, align 4 47 %mul = mul nsw i32 %1, %0 48 %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.08 49 store i32 %mul, ptr %arrayidx2, align 4 50 %inc = add nuw nsw i32 %i.08, 1 51 %exitcond = icmp eq i32 %inc, 1024 52 br i1 %exitcond, label %for.cond.cleanup, label %for.body 53 54for.cond.cleanup: 55 ret void 56} 57 58; CHECK-LABEL: runtime 59define arm_aapcs_vfpcc void @runtime(ptr nocapture %C, ptr nocapture readonly %A, ptr nocapture readonly %B, i32 %N) local_unnamed_addr #0 { 60entry: 61 %cmp8 = icmp eq i32 %N, 0 62 br i1 %cmp8, label %for.cond.cleanup, label %for.body 63 64; CHECK-LABEL: for.body 65for.body: 66; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV2:%[a-z.0-9]+]], %for.body ] 67; CHECK-NOUNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1 68; CHECK-NOUNROLL: [[IV2]] = add nuw i32 [[IV0]], 2 69; CHECK-NOUNROLL: br 70 71; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z.0-9]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body ] 72; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1 73; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2 74; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3 75; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV0]], 4 76; CHECK-UNROLL: br 77 78; CHECK-UNROLL: for.body.epil: 79; CHECK-UNROLL: for.body.epil.1: 80; CHECK-UNROLL: for.body.epil.2: 81 82 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 83 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.09 84 %0 = load i32, ptr %arrayidx, align 4 85 %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.09 86 %1 = load i32, ptr %arrayidx1, align 4 87 %mul = mul nsw i32 %1, %0 88 %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.09 89 store i32 %mul, ptr %arrayidx2, align 4 90 %inc = add nuw i32 %i.09, 1 91 %exitcond = icmp eq i32 %inc, %N 92 br i1 %exitcond, label %for.cond.cleanup, label %for.body 93 94for.cond.cleanup: 95 ret void 96} 97 98; CHECK-LABEL: nested_runtime 99define arm_aapcs_vfpcc void @nested_runtime(ptr nocapture %C, ptr nocapture readonly %A, ptr nocapture readonly %B, i32 %N) local_unnamed_addr #0 { 100entry: 101 %cmp25 = icmp eq i32 %N, 0 102 br i1 %cmp25, label %for.cond.cleanup, label %for.body4.lr.ph 103 104for.body4.lr.ph: 105 %h.026 = phi i32 [ %inc11, %for.cond.cleanup3 ], [ 0, %entry ] 106 %mul = mul i32 %h.026, %N 107 br label %for.body4 108 109for.cond.cleanup: 110 ret void 111 112for.cond.cleanup3: 113 %inc11 = add nuw i32 %h.026, 1 114 %exitcond27 = icmp eq i32 %inc11, %N 115 br i1 %exitcond27, label %for.cond.cleanup, label %for.body4.lr.ph 116 117; CHECK-LABEL: for.body4 118for.body4: 119; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV1:%[a-z.0-9]+]], %for.body4 ] 120; CHECK-NOUNROLL: [[IV1]] = add nuw i32 [[IV0]], 1 121; CHECK-NOUNROLL: br 122 123; CHECK-UNROLL: for.body4.epil: 124; CHECK-UNROLL: for.body4.epil.1: 125; CHECK-UNROLL: for.body4.epil.2: 126; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, [[PRE:%[a-z0-9.]+]] ], [ [[IV4:%[a-z.0-9]+]], %for.body4 ] 127; CHECK-UNROLL: [[IV1:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 1 128; CHECK-UNROLL: [[IV2:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 2 129; CHECK-UNROLL: [[IV3:%[a-z.0-9]+]] = add nuw nsw i32 [[IV0]], 3 130; CHECK-UNROLL: [[IV4]] = add nuw i32 [[IV0]], 4 131; CHECK-UNROLL: br 132 133 %w.024 = phi i32 [ 0, %for.body4.lr.ph ], [ %inc, %for.body4 ] 134 %add = add i32 %w.024, %mul 135 %arrayidx = getelementptr inbounds i16, ptr %A, i32 %add 136 %0 = load i16, ptr %arrayidx, align 2 137 %conv = sext i16 %0 to i32 138 %arrayidx5 = getelementptr inbounds i16, ptr %B, i32 %w.024 139 %1 = load i16, ptr %arrayidx5, align 2 140 %conv6 = sext i16 %1 to i32 141 %mul7 = mul nsw i32 %conv6, %conv 142 %arrayidx8 = getelementptr inbounds i32, ptr %C, i32 %w.024 143 %2 = load i32, ptr %arrayidx8, align 4 144 %add9 = add nsw i32 %mul7, %2 145 store i32 %add9, ptr %arrayidx8, align 4 146 %inc = add nuw i32 %w.024, 1 147 %exitcond = icmp eq i32 %inc, %N 148 br i1 %exitcond, label %for.cond.cleanup3, label %for.body4 149} 150 151; CHECK-LABEL: loop_call 152define arm_aapcs_vfpcc void @loop_call(ptr nocapture %C, ptr nocapture readonly %A, ptr nocapture readonly %B) local_unnamed_addr #1 { 153entry: 154 br label %for.body 155 156for.cond.cleanup: 157 ret void 158 159; CHECK-LABEL: for.body 160for.body: 161; CHECK-NOUNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ] 162; CHECK-NOUNROLL: [[IV1]] = add nuw nsw i32 [[IV0]], 1 163; CHECK-NOUNROLL: icmp eq i32 [[IV1]], 1024 164; CHECK-NOUNROLL: br 165 166; CHECK-UNROLL: [[IV0:%[a-z.0-9]+]] = phi i32 [ 0, %entry ], [ [[IV1:%[a-z.0-9]+]], %for.body ] 167; CHECK-UNROLL: [[IV1]] = add nuw nsw i32 [[IV0]], 1 168; CHECK-UNROLL: icmp eq i32 [[IV1]], 1024 169; CHECK-UNROLL: br 170 171 %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 172 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.08 173 %0 = load i32, ptr %arrayidx, align 4 174 %arrayidx1 = getelementptr inbounds i32, ptr %B, i32 %i.08 175 %1 = load i32, ptr %arrayidx1, align 4 176 %call = tail call arm_aapcs_vfpcc i32 @some_func(i32 %0, i32 %1) #3 177 %arrayidx2 = getelementptr inbounds i32, ptr %C, i32 %i.08 178 store i32 %call, ptr %arrayidx2, align 4 179 %inc = add nuw nsw i32 %i.08, 1 180 %exitcond = icmp eq i32 %inc, 1024 181 br i1 %exitcond, label %for.cond.cleanup, label %for.body 182} 183 184; CHECK-LABEL: iterate_inc 185; CHECK-NOUNROLL: %n.addr.04 = phi ptr [ %1, %while.body ], [ %n, %while.body.preheader ] 186; CHECK-NOUNROLL: %tobool = icmp eq ptr %1, null 187; CHECK-NOUNROLL: br i1 %tobool 188; CHECK-NOUNROLL-NOT: load 189 190; CHECK-UNROLL: [[CMP0:%[a-z.0-9]+]] = icmp eq ptr [[VAR0:%[a-z.0-9]+]], null 191; CHECK-UNROLL: br i1 [[CMP0]], label [[END:%[a-z.0-9]+]] 192; CHECK-UNROLL: [[CMP1:%[a-z.0-9]+]] = icmp eq ptr [[VAR1:%[a-z.0-9]+]], null 193; CHECK-UNROLL: br i1 [[CMP1]], label [[END]] 194; CHECK-UNROLL: [[CMP2:%[a-z.0-9]+]] = icmp eq ptr [[VAR2:%[a-z.0-9]+]], null 195; CHECK-UNROLL: br i1 [[CMP2]], label [[END]] 196; CHECK-UNROLL: [[CMP3:%[a-z.0-9]+]] = icmp eq ptr [[VAR3:%[a-z.0-9]+]], null 197; CHECK-UNROLL: br i1 [[CMP3]], label [[END]] 198; CHECK-UNROLL: [[CMP4:%[a-z.0-9]+]] = icmp eq ptr [[VAR4:%[a-z.0-9]+]], null 199; CHECK-UNROLL: br i1 [[CMP4]], label [[END]] 200; CHECK-UNROLL-NOT: load 201 202%struct.Node = type { ptr, i32 } 203 204define arm_aapcscc void @iterate_inc(ptr %n) local_unnamed_addr #0 { 205entry: 206 %tobool3 = icmp eq ptr %n, null 207 br i1 %tobool3, label %while.end, label %while.body.preheader 208 209while.body.preheader: 210 br label %while.body 211 212while.body: 213 %n.addr.04 = phi ptr [ %1, %while.body ], [ %n, %while.body.preheader ] 214 %val = getelementptr inbounds %struct.Node, ptr %n.addr.04, i32 0, i32 1 215 %0 = load i32, ptr %val, align 4 216 %add = add nsw i32 %0, 1 217 store i32 %add, ptr %val, align 4 218 %1 = load ptr, ptr %n.addr.04, align 4 219 %tobool = icmp eq ptr %1, null 220 br i1 %tobool, label %while.end, label %while.body 221 222while.end: 223 ret void 224} 225 226declare arm_aapcs_vfpcc i32 @some_func(i32, i32) local_unnamed_addr #2 227