1; RUN: opt < %s -passes=loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 2; RUN: opt < %s -passes=loop-unroll,loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s 3; RUN: opt < %s -passes=loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s 4; 5; Run loop unrolling twice to verify that loop unrolling metadata is properly 6; removed and further unrolling is disabled after the pass is run once. 7 8target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 9target triple = "x86_64-unknown-linux-gnu" 10 11; loop4 contains a small loop which should be completely unrolled by 12; the default unrolling heuristics. It serves as a control for the 13; unroll(disable) pragma test loop4_with_disable. 14; 15; CHECK-LABEL: @loop4( 16; CHECK-NOT: br i1 17define void @loop4(ptr nocapture %a) { 18entry: 19 br label %for.body 20 21for.body: ; preds = %for.body, %entry 22 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 23 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 24 %0 = load i32, ptr %arrayidx, align 4 25 %inc = add nsw i32 %0, 1 26 store i32 %inc, ptr %arrayidx, align 4 27 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 28 %exitcond = icmp eq i64 %indvars.iv.next, 4 29 br i1 %exitcond, label %for.end, label %for.body 30 31for.end: ; preds = %for.body 32 ret void 33} 34 35; #pragma clang loop unroll(disable) 36; 37; CHECK-LABEL: @loop4_with_disable( 38; CHECK: store i32 39; CHECK-NOT: store i32 40; CHECK: br i1 41define void @loop4_with_disable(ptr nocapture %a) { 42entry: 43 br label %for.body 44 45for.body: ; preds = %for.body, %entry 46 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 47 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 48 %0 = load i32, ptr %arrayidx, align 4 49 %inc = add nsw i32 %0, 1 50 store i32 %inc, ptr %arrayidx, align 4 51 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 52 %exitcond = icmp eq i64 %indvars.iv.next, 4 53 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1 54 55for.end: ; preds = %for.body 56 ret void 57} 58!1 = !{!1, !2} 59!2 = !{!"llvm.loop.unroll.disable"} 60 61; loop64 has a high enough count that it should *not* be unrolled by 62; the default unrolling heuristic. It serves as the control for the 63; unroll(full) pragma test loop64_with_.* tests below. 64; 65; CHECK-LABEL: @loop64( 66; CHECK: store i32 67; CHECK-NOT: store i32 68; CHECK: br i1 69define void @loop64(ptr nocapture %a) { 70entry: 71 br label %for.body 72 73for.body: ; preds = %for.body, %entry 74 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 75 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 76 %0 = load i32, ptr %arrayidx, align 4 77 %inc = add nsw i32 %0, 1 78 store i32 %inc, ptr %arrayidx, align 4 79 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 80 %exitcond = icmp eq i64 %indvars.iv.next, 64 81 br i1 %exitcond, label %for.end, label %for.body 82 83for.end: ; preds = %for.body 84 ret void 85} 86 87; #pragma clang loop unroll(full) 88; Loop should be fully unrolled. 89; 90; CHECK-LABEL: @loop64_with_full( 91; CHECK-NOT: br i1 92define void @loop64_with_full(ptr nocapture %a) { 93entry: 94 br label %for.body 95 96for.body: ; preds = %for.body, %entry 97 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 98 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 99 %0 = load i32, ptr %arrayidx, align 4 100 %inc = add nsw i32 %0, 1 101 store i32 %inc, ptr %arrayidx, align 4 102 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 103 %exitcond = icmp eq i64 %indvars.iv.next, 64 104 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3 105 106for.end: ; preds = %for.body 107 ret void 108} 109!3 = !{!3, !4} 110!4 = !{!"llvm.loop.unroll.full"} 111 112; #pragma clang loop unroll_count(4) 113; Loop should be unrolled 4 times. 114; 115; CHECK-LABEL: @loop64_with_count4( 116; CHECK: store i32 117; CHECK: store i32 118; CHECK: store i32 119; CHECK: store i32 120; CHECK-NOT: store i32 121; CHECK: br i1 122define void @loop64_with_count4(ptr nocapture %a) { 123entry: 124 br label %for.body 125 126for.body: ; preds = %for.body, %entry 127 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 128 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 129 %0 = load i32, ptr %arrayidx, align 4 130 %inc = add nsw i32 %0, 1 131 store i32 %inc, ptr %arrayidx, align 4 132 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 133 %exitcond = icmp eq i64 %indvars.iv.next, 64 134 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5 135 136for.end: ; preds = %for.body 137 ret void 138} 139!5 = !{!5, !6} 140!6 = !{!"llvm.loop.unroll.count", i32 4} 141 142; #pragma clang loop unroll(full) 143; Full unrolling is requested, but loop has a runtime trip count so 144; no unrolling should occur. 145; 146; CHECK-LABEL: @runtime_loop_with_full( 147; CHECK: store i32 148; CHECK-NOT: store i32 149define void @runtime_loop_with_full(ptr nocapture %a, i32 %b) { 150entry: 151 %cmp3 = icmp sgt i32 %b, 0 152 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 153 154for.body: ; preds = %entry, %for.body 155 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 156 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 157 %0 = load i32, ptr %arrayidx, align 4 158 %inc = add nsw i32 %0, 1 159 store i32 %inc, ptr %arrayidx, align 4 160 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 161 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 162 %exitcond = icmp eq i32 %lftr.wideiv, %b 163 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !8 164 165for.end: ; preds = %for.body, %entry 166 ret void 167} 168!8 = !{!8, !4} 169 170; #pragma clang loop unroll_count(4) 171; Loop has a runtime trip count. Runtime unrolling should occur and loop 172; should be duplicated (original and 4x unrolled) if remainder is allowed, 173; otherwise loop should not be unrolled. 174; 175; CHECK-LABEL: @runtime_loop_with_count4( 176; CHECK: for.body 177; CHECK: store 178; REM: store 179; REM: store 180; REM: store 181; CHECK-NOT: store 182; CHECK: br i1 183; REM: for.body.epil: 184; REM: store 185; NOREM-NOT: for.body.epil: 186; NOREM-NOT: store 187; CHECK-NOT: store 188; REM: br i1 189; NOREM-NOT: br i1 190define void @runtime_loop_with_count4(ptr nocapture %a, i32 %b) { 191entry: 192 %cmp3 = icmp sgt i32 %b, 0 193 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9 194 195for.body: ; preds = %entry, %for.body 196 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 197 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 198 %0 = load i32, ptr %arrayidx, align 4 199 %inc = add nsw i32 %0, 1 200 store i32 %inc, ptr %arrayidx, align 4 201 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 202 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 203 %exitcond = icmp eq i32 %lftr.wideiv, %b 204 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !9 205 206for.end: ; preds = %for.body, %entry 207 ret void 208} 209!9 = !{!9, !6} 210 211; #pragma clang loop unroll_count(1) 212; Loop should not be unrolled 213; 214; CHECK-LABEL: @unroll_1( 215; CHECK: store i32 216; CHECK-NOT: store i32 217; CHECK: br i1 218define void @unroll_1(ptr nocapture %a, i32 %b) { 219entry: 220 br label %for.body 221 222for.body: ; preds = %for.body, %entry 223 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 224 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 225 %0 = load i32, ptr %arrayidx, align 4 226 %inc = add nsw i32 %0, 1 227 store i32 %inc, ptr %arrayidx, align 4 228 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 229 %exitcond = icmp eq i64 %indvars.iv.next, 4 230 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !10 231 232for.end: ; preds = %for.body 233 ret void 234} 235!10 = !{!10, !11} 236!11 = !{!"llvm.loop.unroll.count", i32 1} 237 238; #pragma clang loop unroll(enable) 239; Loop should be fully unrolled. 240; 241; CHECK-LABEL: @loop64_with_enable( 242; CHECK-NOT: br i1 243define void @loop64_with_enable(ptr nocapture %a) { 244entry: 245 br label %for.body 246 247for.body: ; preds = %for.body, %entry 248 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 249 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 250 %0 = load i32, ptr %arrayidx, align 4 251 %inc = add nsw i32 %0, 1 252 store i32 %inc, ptr %arrayidx, align 4 253 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 254 %exitcond = icmp eq i64 %indvars.iv.next, 64 255 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !12 256 257for.end: ; preds = %for.body 258 ret void 259} 260!12 = !{!12, !13} 261!13 = !{!"llvm.loop.unroll.enable"} 262 263; #pragma clang loop unroll(enable) 264; Loop has a runtime trip count and should be runtime unrolled and duplicated 265; (original and 8x) if remainder is allowed, otherwise it should not be 266; unrolled. 267; 268; CHECK-LABEL: @runtime_loop_with_enable( 269; CHECK: for.body: 270; CHECK: store i32 271; REM: store i32 272; REM: store i32 273; REM: store i32 274; REM: store i32 275; REM: store i32 276; REM: store i32 277; REM: store i32 278; CHECK-NOT: store i32 279; CHECK: br i1 280; REM: for.body.epil: 281; NOREM-NOT: for.body.epil: 282; REM: store 283; CHECK-NOT: store 284; REM: br i1 285; NOREM-NOT: br i1 286define void @runtime_loop_with_enable(ptr nocapture %a, i32 %b) { 287entry: 288 %cmp3 = icmp sgt i32 %b, 0 289 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 290 291for.body: ; preds = %entry, %for.body 292 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 293 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 294 %0 = load i32, ptr %arrayidx, align 4 295 %inc = add nsw i32 %0, 1 296 store i32 %inc, ptr %arrayidx, align 4 297 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 298 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 299 %exitcond = icmp eq i32 %lftr.wideiv, %b 300 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !14 301 302for.end: ; preds = %for.body, %entry 303 ret void 304} 305!14 = !{!14, !13} 306 307; #pragma clang loop unroll_count(3) 308; Loop has a runtime trip count. Runtime unrolling should occur and loop 309; should be duplicated (original and 3x unrolled) if remainder is allowed, 310; otherwise it should not be unrolled. 311; 312; CHECK-LABEL: @runtime_loop_with_count3( 313; CHECK: for.body 314; CHECK: store 315; REM: store 316; REM: store 317; CHECK-NOT: store 318; CHECK: br i1 319; REM: for.body.epil: 320; REM: store 321; NOREM-NOT: for.body.epil: 322; NOREM-NOT: store 323; CHECK-NOT: store 324; REM: br i1 325define void @runtime_loop_with_count3(ptr nocapture %a, i32 %b) { 326entry: 327 %cmp3 = icmp sgt i32 %b, 0 328 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 329 330for.body: ; preds = %entry, %for.body 331 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 332 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv 333 %0 = load i32, ptr %arrayidx, align 4 334 %inc = add nsw i32 %0, 1 335 store i32 %inc, ptr %arrayidx, align 4 336 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 337 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 338 %exitcond = icmp eq i32 %lftr.wideiv, %b 339 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 340 341for.end: ; preds = %for.body, %entry 342 ret void 343} 344!15 = !{!15, !16} 345!16 = !{!"llvm.loop.unroll.count", i32 3} 346