1; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S < %s 2>&1 | FileCheck %s 2; RUN: opt -passes=indvars -S < %s 2>&1 | FileCheck %s -check-prefix=INDVARCHECK 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7; Produced from test-case: 8; 9; void testCountIncrLoop(unsigned char *ptr, int lim, int count, int val) 10; { 11; int inx = 0; 12; for (int outer_i = 0; outer_i < lim; ++outer_i) { 13; if (count > 0) { // At runtime, 'count' is 0, so the following code is dead. 14; int result = val; 15; int tmp = count; 16; 17; while (tmp < 8) { 18; result += val >> tmp; 19; tmp += count; 20; } 21; 22; ptr[inx++] = (unsigned char) result; 23; } 24; } 25; } 26; 27; No explicit division appears in the input, but a division is generated during 28; vectorization, and that division is a division-by-0 when the input 'count' 29; is 0, so it cannot be hoisted above the guard of 'count > 0'. 30 31; Verify that a 'udiv' does not appear in the 'loop1.preheader' block, and that 32; a 'udiv' has been inserted at the top of the 'while.body.preheader' block. 33define void @testCountIncrLoop(ptr %ptr, i32 %lim, i32 %count, i32 %val) mustprogress { 34; CHECK-LABEL: @testCountIncrLoop( 35; CHECK-NEXT: entry: 36; CHECK: loop1.preheader: 37; CHECK-NOT: udiv 38; CHECK: loop1.body: 39; CHECK: while.cond.preheader: 40; CHECK: while.body.preheader: 41; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0:%.*]], [[COUNT:%.*]] 42; CHECK: vector.ph: 43; CHECK: exit: 44; CHECK: ret void 45; 46entry: 47 %cmp1 = icmp sgt i32 %lim, 0 48 br i1 %cmp1, label %loop1.preheader, label %exit 49 50loop1.preheader: ; preds = %entry 51 %cmp2 = icmp sgt i32 %count, 0 52 %cmp4 = icmp slt i32 %count, 8 53 br label %loop1.body 54 55loop1.body: ; preds = %loop1.inc, %loop1.preheader 56 %outer_i = phi i32 [ 0, %loop1.preheader ], [ %outer_i.1, %loop1.inc ] 57 %inx.1 = phi i32 [ 0, %loop1.preheader ], [ %inx.2, %loop1.inc ] 58 br i1 %cmp2, label %while.cond.preheader, label %loop1.inc 59 60while.cond.preheader: ; preds = %loop1.body 61 br i1 %cmp4, label %while.body, label %while.end 62 63while.body: ; preds = %while.body, %while.cond.preheader 64 %tmp = phi i32 [ %add3, %while.body ], [ %count, %while.cond.preheader ] 65 %result.1 = phi i32 [ %add, %while.body ], [ %val, %while.cond.preheader ] 66 %shr = ashr i32 %val, %tmp 67 %add = add nsw i32 %shr, %result.1 68 %add3 = add nsw i32 %tmp, %count 69 %cmp3 = icmp slt i32 %add3, 8 70 br i1 %cmp3, label %while.body, label %while.end 71 72while.end: ; preds = %while.body, %while.cond.preheader 73 %result.0.lcssa = phi i32 [ %val, %while.cond.preheader ], [ %add, %while.body ] 74 %conv = trunc i32 %result.0.lcssa to i8 75 %inc = add nsw i32 %inx.1, 1 76 %idxprom = sext i32 %inx.1 to i64 77 %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom 78 store i8 %conv, ptr %arrayidx, align 1 79 br label %loop1.inc 80 81loop1.inc: ; preds = %while.end, %loop1.body 82 %inx.2 = phi i32 [ %inc, %while.end ], [ %inx.1, %loop1.body ] 83 %outer_i.1 = add nuw nsw i32 %outer_i, 1 84 %exitcond = icmp eq i32 %outer_i.1, %lim 85 br i1 %exitcond, label %exit, label %loop1.body 86 87exit: ; preds = %loop1.inc, %entry 88 ret void 89} 90 91; These next tests are all based on the following source code, with slight 92; variations on the calculation of 'incr' (all of which are loop-invariant 93; divisions, but only some of which can be safely hoisted): 94; 95; uint32_t foo(uint32_t *ptr, uint32_t start1, uint32_t start2) { 96; uint32_t counter1, counter2; 97; uint32_t val = start1; 98; for (counter1 = 1; counter1 < 100; ++counter1) { 99; uint32_t index = 0; 100; val += ptr[index]; 101; for (counter2 = start2; counter2 < 10; ++counter2) { 102; // Division is loop invariant, and denominator is guaranteed non-zero: 103; // Safe to hoist it out of the inner loop. 104; uint32_t incr = 16 / counter1; 105; index += incr; 106; val += ptr[index]; 107; } 108; } 109; return val; 110; } 111 112; This version is as written above, where 'incr' is '16/counter1', and it is 113; guaranted that 'counter1' is always non-zero. So it is safe to hoist the 114; division from the inner loop to the preheader. 115; 116; Verify that the 'udiv' is hoisted to the preheader, and is not in the loop body. 117define i32 @NonZeroDivHoist(ptr nocapture readonly %ptr, i32 %start1, i32 %start2) { 118; INDVARCHECK-LABEL: @NonZeroDivHoist( 119; INDVARCHECK-NEXT: entry: 120; INDVARCHECK: for.body3.lr.ph: 121; INDVARCHECK-NEXT: [[TMP0:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]] 122; INDVARCHECK-NEXT: br label [[FOR_BODY3:%.*]] 123; INDVARCHECK: for.body3: 124; INDVARCHECK-NOT: udiv 125; INDVARCHECK: for.end10: 126; 127entry: 128 br label %for.cond 129 130for.cond: ; preds = %for.end, %entry 131 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] 132 %counter1.0 = phi i32 [ 1, %entry ], [ %inc9, %for.end ] 133 %cmp = icmp ult i32 %counter1.0, 100 134 br i1 %cmp, label %for.body, label %for.end10 135 136for.body: ; preds = %for.cond 137 %tmp = load i32, ptr %ptr, align 4 138 %add = add i32 %tmp, %val.0 139 %cmp224 = icmp ult i32 %start2, 10 140 br i1 %cmp224, label %for.body3.lr.ph, label %for.end 141 142for.body3.lr.ph: ; preds = %for.body 143 br label %for.body3 144 145for.body3: ; preds = %for.body3, %for.body3.lr.ph 146 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] 147 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] 148 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] 149 %div = udiv i32 16, %counter1.0 150 %add4 = add i32 %div, %index.027 151 %idxprom5 = zext i32 %add4 to i64 152 %arrayidx6 = getelementptr inbounds i32, ptr %ptr, i64 %idxprom5 153 %tmp1 = load i32, ptr %arrayidx6, align 4 154 %add7 = add i32 %tmp1, %val.126 155 %inc = add i32 %counter2.025, 1 156 %cmp2 = icmp ult i32 %inc, 10 157 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge 158 159for.cond1.for.end_crit_edge: ; preds = %for.body3 160 %split = phi i32 [ %add7, %for.body3 ] 161 br label %for.end 162 163for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body 164 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] 165 %inc9 = add i32 %counter1.0, 1 166 br label %for.cond 167 168for.end10: ; preds = %for.cond 169 %val.0.lcssa = phi i32 [ %val.0, %for.cond ] 170 ret i32 %val.0.lcssa 171} 172 173; This version is identical to the above 'NonZeroDivHoist' case, except the 174; outer ('counter1') loop starts at the unknown value of 'start1' rather than 1, 175; and so it is illegal to hoist the division because if 'start1' is 0, hoisting 176; it would incorrectly cause a divide-by-zero trap. 177; 178; Verify that the 'udiv' is not hoisted to the preheader, and it remains in the 179; loop body. 180define i32 @ZeroDivNoHoist(ptr nocapture readonly %ptr, i32 %start1, i32 %start2) { 181; INDVARCHECK-LABEL: @ZeroDivNoHoist( 182; INDVARCHECK-NEXT: entry: 183; INDVARCHECK-NOT: udiv 184; INDVARCHECK: for.body3: 185; INDVARCHECK: [[TMP1:%.*]] = udiv i64 16, [[INDVARS_IV:%.*]] 186; INDVARCHECK: for.cond1.for.end_crit_edge: 187; 188entry: 189 br label %for.cond 190 191for.cond: ; preds = %for.end, %entry 192 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] 193 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] 194 %cmp = icmp ult i32 %counter1.0, 100 195 br i1 %cmp, label %for.body, label %for.end10 196 197for.body: ; preds = %for.cond 198 %tmp = load i32, ptr %ptr, align 4 199 %add = add i32 %tmp, %val.0 200 %cmp224 = icmp ult i32 %start2, 10 201 br i1 %cmp224, label %for.body3.lr.ph, label %for.end 202 203for.body3.lr.ph: ; preds = %for.body 204 br label %for.body3 205 206for.body3: ; preds = %for.body3, %for.body3.lr.ph 207 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] 208 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] 209 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] 210 %div = udiv i32 16, %counter1.0 211 %add4 = add i32 %div, %index.027 212 %idxprom5 = zext i32 %add4 to i64 213 %arrayidx6 = getelementptr inbounds i32, ptr %ptr, i64 %idxprom5 214 %tmp1 = load i32, ptr %arrayidx6, align 4 215 %add7 = add i32 %tmp1, %val.126 216 %inc = add i32 %counter2.025, 1 217 %cmp2 = icmp ult i32 %inc, 10 218 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge 219 220for.cond1.for.end_crit_edge: ; preds = %for.body3 221 %split = phi i32 [ %add7, %for.body3 ] 222 br label %for.end 223 224for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body 225 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] 226 %inc9 = add i32 %counter1.0, 1 227 br label %for.cond 228 229for.end10: ; preds = %for.cond 230 %val.0.lcssa = phi i32 [ %val.0, %for.cond ] 231 ret i32 %val.0.lcssa 232} 233 234; This version is has a clearly safe division by a non-zero constant (16). The 235; division is transformed to a logical-shift-right of 4, and it is safely 236; hoisted. 237; 238; Verify that the division-operation is hoisted, and that it appears as a 239; right-shift ('lshr') rather than an explicit division. 240define i32 @DivBy16Hoist(ptr nocapture readonly %ptr, i32 %start1, i32 %start2) { 241; INDVARCHECK-LABEL: @DivBy16Hoist( 242; INDVARCHECK-NEXT: entry: 243; INDVARCHECK: for.cond: 244; INDVARCHECK: [[TMP1:%.*]] = lshr i64 [[INDVARS_IV:%.*]], 4 245; INDVARCHECK: for.body: 246; INDVARCHECK-NOT: lshr 247; INDVARCHECK-NOT: udiv 248; INDVARCHECK: for.end10: 249; 250entry: 251 br label %for.cond 252 253for.cond: ; preds = %for.end, %entry 254 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] 255 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] 256 %cmp = icmp ult i32 %counter1.0, 100 257 br i1 %cmp, label %for.body, label %for.end10 258 259for.body: ; preds = %for.cond 260 %tmp = load i32, ptr %ptr, align 4 261 %add = add i32 %tmp, %val.0 262 %cmp224 = icmp ult i32 %start2, 10 263 br i1 %cmp224, label %for.body3.lr.ph, label %for.end 264 265for.body3.lr.ph: ; preds = %for.body 266 br label %for.body3 267 268for.body3: ; preds = %for.body3, %for.body3.lr.ph 269 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] 270 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] 271 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] 272 %div = udiv i32 %counter1.0, 16 273 %add4 = add i32 %div, %index.027 274 %idxprom5 = zext i32 %add4 to i64 275 %arrayidx6 = getelementptr inbounds i32, ptr %ptr, i64 %idxprom5 276 %tmp1 = load i32, ptr %arrayidx6, align 4 277 %add7 = add i32 %tmp1, %val.126 278 %inc = add i32 %counter2.025, 1 279 %cmp2 = icmp ult i32 %inc, 10 280 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge 281 282for.cond1.for.end_crit_edge: ; preds = %for.body3 283 %split = phi i32 [ %add7, %for.body3 ] 284 br label %for.end 285 286for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body 287 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] 288 %inc9 = add i32 %counter1.0, 1 289 br label %for.cond 290 291for.end10: ; preds = %for.cond 292 %val.0.lcssa = phi i32 [ %val.0, %for.cond ] 293 ret i32 %val.0.lcssa 294} 295 296; This version is has a clearly safe division by a non-zero constant (17). The 297; division is safely hoisted, as it was in the 'DivBy16Hoist' verison, but here 298; it remains a division, rather than being transformed to a right-shift. 299; 300; Verify that the division-operation is hoisted. 301define i32 @DivBy17Hoist(ptr nocapture readonly %ptr, i32 %start1, i32 %start2) { 302; INDVARCHECK-LABEL: @DivBy17Hoist( 303; INDVARCHECK-NEXT: entry: 304; INDVARCHECK: for.cond: 305; INDVARCHECK: [[TMP1:%.*]] = udiv i64 [[INDVARS_IV:%.*]], 17 306; INDVARCHECK: for.body: 307; INDVARCHECK-NOT: udiv 308; INDVARCHECK: for.end10: 309; 310entry: 311 br label %for.cond 312 313for.cond: ; preds = %for.end, %entry 314 %val.0 = phi i32 [ %start1, %entry ], [ %val.1.lcssa, %for.end ] 315 %counter1.0 = phi i32 [ %start1, %entry ], [ %inc9, %for.end ] 316 %cmp = icmp ult i32 %counter1.0, 100 317 br i1 %cmp, label %for.body, label %for.end10 318 319for.body: ; preds = %for.cond 320 %tmp = load i32, ptr %ptr, align 4 321 %add = add i32 %tmp, %val.0 322 %cmp224 = icmp ult i32 %start2, 10 323 br i1 %cmp224, label %for.body3.lr.ph, label %for.end 324 325for.body3.lr.ph: ; preds = %for.body 326 br label %for.body3 327 328for.body3: ; preds = %for.body3, %for.body3.lr.ph 329 %index.027 = phi i32 [ 0, %for.body3.lr.ph ], [ %add4, %for.body3 ] 330 %val.126 = phi i32 [ %add, %for.body3.lr.ph ], [ %add7, %for.body3 ] 331 %counter2.025 = phi i32 [ %start2, %for.body3.lr.ph ], [ %inc, %for.body3 ] 332 %div = udiv i32 %counter1.0, 17 333 %add4 = add i32 %div, %index.027 334 %idxprom5 = zext i32 %add4 to i64 335 %arrayidx6 = getelementptr inbounds i32, ptr %ptr, i64 %idxprom5 336 %tmp1 = load i32, ptr %arrayidx6, align 4 337 %add7 = add i32 %tmp1, %val.126 338 %inc = add i32 %counter2.025, 1 339 %cmp2 = icmp ult i32 %inc, 10 340 br i1 %cmp2, label %for.body3, label %for.cond1.for.end_crit_edge 341 342for.cond1.for.end_crit_edge: ; preds = %for.body3 343 %split = phi i32 [ %add7, %for.body3 ] 344 br label %for.end 345 346for.end: ; preds = %for.cond1.for.end_crit_edge, %for.body 347 %val.1.lcssa = phi i32 [ %split, %for.cond1.for.end_crit_edge ], [ %add, %for.body ] 348 %inc9 = add i32 %counter1.0, 1 349 br label %for.cond 350 351for.end10: ; preds = %for.cond 352 %val.0.lcssa = phi i32 [ %val.0, %for.cond ] 353 ret i32 %val.0.lcssa 354} 355