1; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | \ 2; RUN: FileCheck %s 3; RUN: opt -mtriple=thumbv8.1m.main -passes=loop-unroll -unroll-remainder=false -S < %s | \ 4; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL 5; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops \ 6; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \ 7; RUN: FileCheck %s --check-prefix=CHECK-REMARKS 8 9 10; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 11; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported 12; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 13; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 14; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 15; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 16; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate 17; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate 18; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 19; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop 20 21 22; CHECK-LABEL: early_exit 23; CHECK-NOT: llvm.set.loop.iterations 24; CHECK-NOT: llvm.loop.decrement 25define i32 @early_exit(ptr nocapture readonly %a, i32 %max, i32 %n) { 26entry: 27 br label %do.body 28 29do.body: 30 %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ] 31 %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.0 32 %0 = load i32, ptr %arrayidx, align 4 33 %cmp = icmp sgt i32 %0, %max 34 br i1 %cmp, label %do.end, label %if.end 35 36if.end: 37 %inc = add nuw i32 %i.0, 1 38 %cmp1 = icmp ult i32 %inc, %n 39 br i1 %cmp1, label %do.body, label %if.end.do.end_crit_edge 40 41if.end.do.end_crit_edge: 42 %arrayidx2.phi.trans.insert = getelementptr inbounds i32, ptr %a, i32 %inc 43 %.pre = load i32, ptr %arrayidx2.phi.trans.insert, align 4 44 br label %do.end 45 46do.end: 47 %1 = phi i32 [ %.pre, %if.end.do.end_crit_edge ], [ %0, %do.body ] 48 ret i32 %1 49} 50 51; CHECK-LABEL: nested 52; CHECK-NOT: call i32 @llvm.start.loop.iterations.i32(i32 %N) 53; CHECK: br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 54 55; CHECK: [[START:%[^ ]+]] = call i32 @llvm.start.loop.iterations.i32(i32 %N) 56; CHECK: br label %while.body3.us 57 58; CHECK: [[REM:%[^ ]+]] = phi i32 [ [[START]], %while.cond1.preheader.us ], [ [[LOOP_DEC:%[^ ]+]], %while.body3.us ] 59; CHECK: [[LOOP_DEC]] = call i32 @llvm.loop.decrement.reg.i32(i32 [[REM]], i32 1) 60; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 61; CHECK: br i1 [[CMP]], label %while.body3.us, label %while.cond1.while.end_crit_edge.us 62 63; CHECK-NOT: %{{[^ ]+}} = call i1 @llvm.loop.decrement.i32(i32 1) 64 65define void @nested(ptr nocapture %A, i32 %N) { 66entry: 67 %cmp20 = icmp eq i32 %N, 0 68 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 69 70while.cond1.preheader.us: 71 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 72 %mul.us = mul i32 %i.021.us, %N 73 br label %while.body3.us 74 75while.body3.us: 76 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 77 %add.us = add i32 %j.019.us, %mul.us 78 %arrayidx.us = getelementptr inbounds i32, ptr %A, i32 %add.us 79 store i32 %add.us, ptr %arrayidx.us, align 4 80 %inc.us = add nuw i32 %j.019.us, 1 81 %exitcond = icmp eq i32 %inc.us, %N 82 br i1 %exitcond, label %while.cond1.while.end_crit_edge.us, label %while.body3.us 83 84while.cond1.while.end_crit_edge.us: 85 %inc6.us = add nuw i32 %i.021.us, 1 86 %exitcond23 = icmp eq i32 %inc6.us, %N 87 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 88 89while.end7: 90 ret void 91} 92 93; CHECK-LABEL: pre_existing 94; CHECK: llvm.start.loop.iterations 95; CHECK-NOT: llvm.start.loop.iterations 96; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 97; CHECK-NOT: call i32 @llvm.loop.decrement.reg 98define i32 @pre_existing(i32 %n, ptr nocapture %p, ptr nocapture readonly %q) { 99entry: 100 %start = call i32 @llvm.start.loop.iterations.i32(i32 %n) 101 br label %while.body 102 103while.body: ; preds = %while.body, %entry 104 %q.addr.05 = phi ptr [ %incdec.ptr, %while.body ], [ %q, %entry ] 105 %p.addr.04 = phi ptr [ %incdec.ptr1, %while.body ], [ %p, %entry ] 106 %0 = phi i32 [ %start, %entry ], [ %2, %while.body ] 107 %incdec.ptr = getelementptr inbounds i32, ptr %q.addr.05, i32 1 108 %1 = load i32, ptr %q.addr.05, align 4 109 %incdec.ptr1 = getelementptr inbounds i32, ptr %p.addr.04, i32 1 110 store i32 %1, ptr %p.addr.04, align 4 111 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 112 %3 = icmp ne i32 %2, 0 113 br i1 %3, label %while.body, label %while.end 114 115while.end: ; preds = %while.body 116 ret i32 0 117} 118 119; CHECK-LABEL: pre_existing_test_set 120; CHECK: call { i32, i1 } @llvm.test.start.loop.iterations 121; CHECK-NOT: llvm.set{{.*}}.loop.iterations 122; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 123; CHECK-NOT: call i32 @llvm.loop.decrement.reg 124define i32 @pre_existing_test_set(i32 %n, ptr nocapture %p, ptr nocapture readonly %q) { 125entry: 126 %guard = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %n) 127 %g0 = extractvalue { i32, i1 } %guard, 0 128 %g1 = extractvalue { i32, i1 } %guard, 1 129 br i1 %g1, label %while.preheader, label %while.end 130 131while.preheader: 132 br label %while.body 133 134while.body: ; preds = %while.body, %entry 135 %q.addr.05 = phi ptr [ %incdec.ptr, %while.body ], [ %q, %while.preheader ] 136 %p.addr.04 = phi ptr [ %incdec.ptr1, %while.body ], [ %p, %while.preheader ] 137 %0 = phi i32 [ %g0, %while.preheader ], [ %2, %while.body ] 138 %incdec.ptr = getelementptr inbounds i32, ptr %q.addr.05, i32 1 139 %1 = load i32, ptr %q.addr.05, align 4 140 %incdec.ptr1 = getelementptr inbounds i32, ptr %p.addr.04, i32 1 141 store i32 %1, ptr %p.addr.04, align 4 142 %2 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 143 %3 = icmp ne i32 %2, 0 144 br i1 %3, label %while.body, label %while.end 145 146while.end: ; preds = %while.body 147 ret i32 0 148} 149 150; CHECK-LABEL: pre_existing_inner 151; CHECK-NOT: llvm.start.loop.iterations 152; CHECK: while.cond1.preheader.us: 153; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 154; CHECK: call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 155; CHECK: br i1 156; CHECK-NOT: call i32 @llvm.loop.decrement 157define void @pre_existing_inner(ptr nocapture %A, i32 %N) { 158entry: 159 %cmp20 = icmp eq i32 %N, 0 160 br i1 %cmp20, label %while.end7, label %while.cond1.preheader.us 161 162while.cond1.preheader.us: 163 %i.021.us = phi i32 [ %inc6.us, %while.cond1.while.end_crit_edge.us ], [ 0, %entry ] 164 %mul.us = mul i32 %i.021.us, %N 165 %start = call i32 @llvm.start.loop.iterations.i32(i32 %N) 166 br label %while.body3.us 167 168while.body3.us: 169 %j.019.us = phi i32 [ 0, %while.cond1.preheader.us ], [ %inc.us, %while.body3.us ] 170 %0 = phi i32 [ %start, %while.cond1.preheader.us ], [ %1, %while.body3.us ] 171 %add.us = add i32 %j.019.us, %mul.us 172 %arrayidx.us = getelementptr inbounds i32, ptr %A, i32 %add.us 173 store i32 %add.us, ptr %arrayidx.us, align 4 174 %inc.us = add nuw i32 %j.019.us, 1 175 %1 = call i32 @llvm.loop.decrement.reg.i32(i32 %0, i32 1) 176 %2 = icmp ne i32 %1, 0 177 br i1 %2, label %while.body3.us, label %while.cond1.while.end_crit_edge.us 178 179while.cond1.while.end_crit_edge.us: 180 %inc6.us = add nuw i32 %i.021.us, 1 181 %exitcond23 = icmp eq i32 %inc6.us, %N 182 br i1 %exitcond23, label %while.end7, label %while.cond1.preheader.us 183 184while.end7: 185 ret void 186} 187 188; CHECK-LABEL: not_rotated 189; CHECK-NOT: call i32 @llvm.start.loop.iterations 190; CHECK-NOT: call i32 @llvm.loop.decrement.i32 191define void @not_rotated(i32, ptr nocapture, i16 signext) { 192 br label %4 193 1944: 195 %5 = phi i32 [ 0, %3 ], [ %19, %18 ] 196 %6 = icmp eq i32 %5, %0 197 br i1 %6, label %20, label %7 198 1997: 200 %8 = mul i32 %5, %0 201 br label %9 202 2039: 204 %10 = phi i32 [ %17, %12 ], [ 0, %7 ] 205 %11 = icmp eq i32 %10, %0 206 br i1 %11, label %18, label %12 207 20812: 209 %13 = add i32 %10, %8 210 %14 = getelementptr inbounds i16, ptr %1, i32 %13 211 %15 = load i16, ptr %14, align 2 212 %16 = add i16 %15, %2 213 store i16 %16, ptr %14, align 2 214 %17 = add i32 %10, 1 215 br label %9 216 21718: 218 %19 = add i32 %5, 1 219 br label %4 220 22120: 222 ret void 223} 224 225; CHECK-LABEL: multi_latch 226; CHECK-NOT: call i32 @llvm.start.loop.iterations 227; CHECK-NOT: call i32 @llvm.loop.decrement 228define void @multi_latch(ptr %a, ptr %b, i32 %N) { 229entry: 230 %half = lshr i32 %N, 1 231 br label %header 232 233header: 234 %iv = phi i32 [ 0, %entry ], [ %count.next, %latch.0 ], [ %count.next, %latch.1 ] 235 %cmp = icmp ult i32 %iv, %half 236 %addr.a = getelementptr i32, ptr %a, i32 %iv 237 %addr.b = getelementptr i32, ptr %b, i32 %iv 238 br i1 %cmp, label %if.then, label %if.else 239 240if.then: 241 store i32 %iv, ptr %addr.a 242 br label %latch.0 243 244if.else: 245 store i32 %iv, ptr %addr.b 246 br label %latch.0 247 248latch.0: 249 %count.next = add nuw i32 %iv, 1 250 %cmp.1 = icmp ult i32 %count.next, %half 251 br i1 %cmp.1, label %header, label %latch.1 252 253latch.1: 254 %ld = load i32, ptr %addr.a 255 store i32 %ld, ptr %addr.b 256 %cmp.2 = icmp ult i32 %count.next, %N 257 br i1 %cmp.2, label %header, label %latch.1 258 259exit: 260 ret void 261} 262 263; CHECK-LABEL: search 264; CHECK: entry: 265; CHECK: [[TEST1:%[^ ]+]] = call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %N) 266; CHECK: [[TEST:%[^ ]+]] = extractvalue { i32, i1 } [[TEST1]], 1 267; CHECK: br i1 [[TEST]], label %for.body.preheader, label %for.cond.cleanup 268; CHECK: for.body.preheader: 269; CHECK: br label %for.body 270; CHECK: for.body: 271; CHECK: for.inc: 272; CHECK: [[LOOP_DEC:%[^ ]+]] = call i32 @llvm.loop.decrement.reg.i32( 273; CHECK: [[CMP:%[^ ]+]] = icmp ne i32 [[LOOP_DEC]], 0 274; CHECK: br i1 [[CMP]], label %for.body, label %for.cond.cleanup 275define i32 @search(ptr nocapture readonly %c, i32 %N) { 276entry: 277 %cmp11 = icmp eq i32 %N, 0 278 br i1 %cmp11, label %for.cond.cleanup, label %for.body 279 280for.cond.cleanup: 281 %found.0.lcssa = phi i32 [ 0, %entry ], [ %found.1, %for.inc ] 282 %spaces.0.lcssa = phi i32 [ 0, %entry ], [ %spaces.1, %for.inc ] 283 %sub = sub nsw i32 %found.0.lcssa, %spaces.0.lcssa 284 ret i32 %sub 285 286for.body: 287 %i.014 = phi i32 [ %inc3, %for.inc ], [ 0, %entry ] 288 %spaces.013 = phi i32 [ %spaces.1, %for.inc ], [ 0, %entry ] 289 %found.012 = phi i32 [ %found.1, %for.inc ], [ 0, %entry ] 290 %arrayidx = getelementptr inbounds i8, ptr %c, i32 %i.014 291 %0 = load i8, ptr %arrayidx, align 1 292 switch i8 %0, label %for.inc [ 293 i8 108, label %sw.bb 294 i8 111, label %sw.bb 295 i8 112, label %sw.bb 296 i8 32, label %sw.bb1 297 ] 298 299sw.bb: ; preds = %for.body, %for.body, %for.body 300 %inc = add nsw i32 %found.012, 1 301 br label %for.inc 302 303sw.bb1: ; preds = %for.body 304 %inc2 = add nsw i32 %spaces.013, 1 305 br label %for.inc 306 307for.inc: ; preds = %sw.bb, %sw.bb1, %for.body 308 %found.1 = phi i32 [ %found.012, %for.body ], [ %found.012, %sw.bb1 ], [ %inc, %sw.bb ] 309 %spaces.1 = phi i32 [ %spaces.013, %for.body ], [ %inc2, %sw.bb1 ], [ %spaces.013, %sw.bb ] 310 %inc3 = add nuw i32 %i.014, 1 311 %exitcond = icmp eq i32 %inc3, %N 312 br i1 %exitcond, label %for.cond.cleanup, label %for.body 313} 314 315; CHECK-LABEL: unroll_inc_int 316; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 317; CHECK: call i32 @llvm.loop.decrement.reg.i32( 318 319; CHECK-UNROLL-LABEL: unroll_inc_int 320; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 321; CHECK-UNROLL-NOT: dls 322; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 323; CHECK-UNROLL: le lr, [[LOOP]] 324; CHECK-UNROLL: wls lr, r12, [[EXIT:.LBB[0-9_]+]] 325; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 326; CHECK-UNROLL: le lr, [[EPIL]] 327; CHECK-UNROLL-NEXT: [[EXIT]] 328 329define void @unroll_inc_int(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { 330entry: 331 %cmp8 = icmp sgt i32 %N, 0 332 br i1 %cmp8, label %for.body, label %for.cond.cleanup 333 334for.cond.cleanup: 335 ret void 336 337for.body: 338 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 339 %arrayidx = getelementptr inbounds i32, ptr %b, i32 %i.09 340 %0 = load i32, ptr %arrayidx, align 4 341 %arrayidx1 = getelementptr inbounds i32, ptr %c, i32 %i.09 342 %1 = load i32, ptr %arrayidx1, align 4 343 %mul = mul nsw i32 %1, %0 344 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 %i.09 345 store i32 %mul, ptr %arrayidx2, align 4 346 %inc = add nuw nsw i32 %i.09, 1 347 %exitcond = icmp eq i32 %inc, %N 348 br i1 %exitcond, label %for.cond.cleanup, label %for.body 349} 350 351; CHECK-LABEL: unroll_inc_unsigned 352; CHECK: call { i32, i1 } @llvm.test.start.loop.iterations.i32(i32 %N) 353; CHECK: call i32 @llvm.loop.decrement.reg.i32( 354 355; CHECK-UNROLL-LABEL: unroll_inc_unsigned 356; CHECK-UNROLL: [[PREHEADER:.LBB[0-9_]+]]: @ %for.body.preheader 357; CHECK-UNROLL-NOT: dls 358; CHECK-UNROLL: [[LOOP:.LBB[0-9_]+]]: @ %for.body 359; CHECK-UNROLL: le lr, [[LOOP]] 360; CHECK-UNROLL: wls lr, r12, [[EPIL_EXIT:.LBB[0-9_]+]] 361; CHECK-UNROLL: [[EPIL:.LBB[0-9_]+]]: 362; CHECK-UNROLL: le lr, [[EPIL]] 363; CHECK-UNROLL: [[EPIL_EXIT]]: 364; CHECK-UNROLL: pop 365define void @unroll_inc_unsigned(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { 366entry: 367 %cmp8 = icmp eq i32 %N, 0 368 br i1 %cmp8, label %for.cond.cleanup, label %for.body 369 370for.cond.cleanup: 371 ret void 372 373for.body: 374 %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ] 375 %arrayidx = getelementptr inbounds i32, ptr %b, i32 %i.09 376 %0 = load i32, ptr %arrayidx, align 4 377 %arrayidx1 = getelementptr inbounds i32, ptr %c, i32 %i.09 378 %1 = load i32, ptr %arrayidx1, align 4 379 %mul = mul nsw i32 %1, %0 380 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 %i.09 381 store i32 %mul, ptr %arrayidx2, align 4 382 %inc = add nuw i32 %i.09, 1 383 %exitcond = icmp eq i32 %inc, %N 384 br i1 %exitcond, label %for.cond.cleanup, label %for.body 385} 386 387; CHECK-LABEL: unroll_dec_int 388; CHECK: call i32 @llvm.start.loop.iterations.i32(i32 %N) 389; CHECK: call i32 @llvm.loop.decrement.reg.i32( 390 391; CHECK-UNROLL-LABEL: unroll_dec_int: 392; CHECK-UNROLL: wls lr, {{.*}}, [[PROLOGUE_EXIT:.LBB[0-9_]+]] 393; CHECK-UNROLL-NEXT: [[PROLOGUE:.LBB[0-9_]+]]: 394; CHECK-UNROLL: le lr, [[PROLOGUE]] 395; CHECK-UNROLL-NEXT: [[PROLOGUE_EXIT:.LBB[0-9_]+]]: 396; CHECK-UNROLL: [[BODY:.LBB[0-9_]+]]: 397; CHECK-UNROLL: le lr, [[BODY]] 398; CHECK-UNROLL-NOT: b 399; CHECK-UNROLL: pop 400define void @unroll_dec_int(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture readonly %c, i32 %N) { 401entry: 402 %cmp8 = icmp sgt i32 %N, 0 403 br i1 %cmp8, label %for.body, label %for.cond.cleanup 404 405for.cond.cleanup: 406 ret void 407 408for.body: 409 %i.09 = phi i32 [ %dec, %for.body ], [ %N, %entry ] 410 %arrayidx = getelementptr inbounds i32, ptr %b, i32 %i.09 411 %0 = load i32, ptr %arrayidx, align 4 412 %arrayidx1 = getelementptr inbounds i32, ptr %c, i32 %i.09 413 %1 = load i32, ptr %arrayidx1, align 4 414 %mul = mul nsw i32 %1, %0 415 %arrayidx2 = getelementptr inbounds i32, ptr %a, i32 %i.09 416 store i32 %mul, ptr %arrayidx2, align 4 417 %dec = add nsw i32 %i.09, -1 418 %cmp = icmp sgt i32 %dec, 0 419 br i1 %cmp, label %for.body, label %for.cond.cleanup 420} 421 422declare i32 @llvm.start.loop.iterations.i32(i32) #0 423declare { i32, i1 } @llvm.test.start.loop.iterations.i32(i32) #0 424declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #0 425 426