1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=indvars -S -indvars-predicate-loops=0 | FileCheck %s 3 4; Make sure that indvars can perform LFTR without a canonical IV. 5 6target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 7 8; Perform LFTR using the original pointer-type IV. 9 10declare void @use(double %x) 11 12; for(char* p = base; p < base + n; ++p) { 13; *p = p-base; 14; } 15define void @ptriv(ptr %base, i32 %n) nounwind { 16; CHECK-LABEL: @ptriv( 17; CHECK-NEXT: entry: 18; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[N:%.*]] to i64 19; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], i64 [[IDX_EXT]] 20; CHECK-NEXT: [[CMP1:%.*]] = icmp ult ptr [[BASE]], [[ADD_PTR]] 21; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 22; CHECK: for.body.preheader: 23; CHECK-NEXT: br label [[FOR_BODY:%.*]] 24; CHECK: for.body: 25; CHECK-NEXT: [[P_02:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[BASE]], [[FOR_BODY_PREHEADER]] ] 26; CHECK-NEXT: [[SUB_PTR_LHS_CAST:%.*]] = ptrtoint ptr [[P_02]] to i64 27; CHECK-NEXT: [[SUB_PTR_RHS_CAST:%.*]] = ptrtoint ptr [[BASE]] to i64 28; CHECK-NEXT: [[SUB_PTR_SUB:%.*]] = sub i64 [[SUB_PTR_LHS_CAST]], [[SUB_PTR_RHS_CAST]] 29; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[SUB_PTR_SUB]] to i8 30; CHECK-NEXT: store i8 [[CONV]], ptr [[P_02]], align 1 31; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[P_02]], i32 1 32; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[INCDEC_PTR]], [[ADD_PTR]] 33; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]] 34; CHECK: for.end.loopexit: 35; CHECK-NEXT: br label [[FOR_END]] 36; CHECK: for.end: 37; CHECK-NEXT: ret void 38; 39entry: 40 %idx.ext = sext i32 %n to i64 41 %add.ptr = getelementptr inbounds i8, ptr %base, i64 %idx.ext 42 %cmp1 = icmp ult ptr %base, %add.ptr 43 br i1 %cmp1, label %for.body, label %for.end 44 45for.body: 46 %p.02 = phi ptr [ %base, %entry ], [ %incdec.ptr, %for.body ] 47 ; cruft to make the IV useful 48 %sub.ptr.lhs.cast = ptrtoint ptr %p.02 to i64 49 %sub.ptr.rhs.cast = ptrtoint ptr %base to i64 50 %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast 51 %conv = trunc i64 %sub.ptr.sub to i8 52 store i8 %conv, ptr %p.02 53 %incdec.ptr = getelementptr inbounds i8, ptr %p.02, i32 1 54 %cmp = icmp ult ptr %incdec.ptr, %add.ptr 55 br i1 %cmp, label %for.body, label %for.end 56 57for.end: 58 ret void 59} 60 61; This test checks that SCEVExpander can handle an outer loop that has been 62; simplified, and as a result the inner loop's exit test will be rewritten. 63define void @expandOuterRecurrence(i32 %arg) nounwind { 64; CHECK-LABEL: @expandOuterRecurrence( 65; CHECK-NEXT: entry: 66; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 67; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] 68; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] 69; CHECK: outer.preheader: 70; CHECK-NEXT: br label [[OUTER:%.*]] 71; CHECK: outer: 72; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[SUB1]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] 73; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] 74; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] 75; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 76; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]] 77; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]] 78; CHECK: inner.ph: 79; CHECK-NEXT: br label [[INNER:%.*]] 80; CHECK: inner: 81; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[INNER_PH]] ], [ [[J_INC:%.*]], [[INNER]] ] 82; CHECK-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 83; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[J_INC]], [[INDVARS_IV]] 84; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]] 85; CHECK: outer.inc.loopexit: 86; CHECK-NEXT: br label [[OUTER_INC]] 87; CHECK: outer.inc: 88; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 89; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 90; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] 91; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] 92; CHECK: exit.loopexit: 93; CHECK-NEXT: br label [[EXIT]] 94; CHECK: exit: 95; CHECK-NEXT: ret void 96; 97entry: 98 %sub1 = sub nsw i32 %arg, 1 99 %cmp1 = icmp slt i32 0, %sub1 100 br i1 %cmp1, label %outer, label %exit 101 102outer: 103 %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ] 104 %sub2 = sub nsw i32 %arg, %i 105 %sub3 = sub nsw i32 %sub2, 1 106 %cmp2 = icmp slt i32 0, %sub3 107 br i1 %cmp2, label %inner.ph, label %outer.inc 108 109inner.ph: 110 br label %inner 111 112inner: 113 %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ] 114 %j.inc = add nsw i32 %j, 1 115 %cmp3 = icmp slt i32 %j.inc, %sub3 116 br i1 %cmp3, label %inner, label %outer.inc 117 118outer.inc: 119 %i.inc = add nsw i32 %i, 1 120 %cmp4 = icmp slt i32 %i.inc, %sub1 121 br i1 %cmp4, label %outer, label %exit 122 123exit: 124 ret void 125} 126 127; Force SCEVExpander to look for an existing well-formed phi. 128; Perform LFTR without generating extra preheader code. 129; TODO: Recover regression after not strengthening AddRec flags during 130; get[Sign,Zero]ExtendExpr for INDVARS_IV_NEXT. 131define void @guardedloop(ptr %matrix, ptr %vector, 132; 133; CHECK-LABEL: @guardedloop( 134; CHECK-NEXT: entry: 135; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[IROW:%.*]] 136; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]] 137; CHECK: loop.preheader: 138; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[ILEAD:%.*]] to i64 139; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[IROW]] to i64 140; CHECK-NEXT: br label [[LOOP:%.*]] 141; CHECK: loop: 142; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ] 143; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] 144; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[INDVARS_IV2]] 145; CHECK-NEXT: [[MATRIXP:%.*]] = getelementptr inbounds [0 x double], ptr [[MATRIX:%.*]], i32 0, i64 [[TMP1]] 146; CHECK-NEXT: [[V1:%.*]] = load double, ptr [[MATRIXP]], align 8 147; CHECK-NEXT: call void @use(double [[V1]]) 148; CHECK-NEXT: [[VECTORP:%.*]] = getelementptr inbounds [0 x double], ptr [[VECTOR:%.*]], i32 0, i64 [[INDVARS_IV2]] 149; CHECK-NEXT: [[V2:%.*]] = load double, ptr [[VECTORP]], align 8 150; CHECK-NEXT: call void @use(double [[V2]]) 151; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] 152; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 153; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] 154; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN_LOOPEXIT:%.*]] 155; CHECK: return.loopexit: 156; CHECK-NEXT: br label [[RETURN]] 157; CHECK: return: 158; CHECK-NEXT: ret void 159; 160 i32 %irow, i32 %ilead) nounwind { 161entry: 162 %cmp = icmp slt i32 1, %irow 163 br i1 %cmp, label %loop, label %return 164 165loop: 166 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] 167 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] 168 %diagidx = add nsw i32 %rowidx, %i 169 %diagidxw = sext i32 %diagidx to i64 170 %matrixp = getelementptr inbounds [0 x double], ptr %matrix, i32 0, i64 %diagidxw 171 %v1 = load double, ptr %matrixp 172 call void @use(double %v1) 173 %iw = sext i32 %i to i64 174 %vectorp = getelementptr inbounds [0 x double], ptr %vector, i32 0, i64 %iw 175 %v2 = load double, ptr %vectorp 176 call void @use(double %v2) 177 %row.inc = add nsw i32 %rowidx, %ilead 178 %i.inc = add nsw i32 %i, 1 179 %cmp196 = icmp slt i32 %i.inc, %irow 180 br i1 %cmp196, label %loop, label %return 181 182return: 183 ret void 184} 185 186; Avoid generating extra code to materialize a trip count. Skip LFTR. 187define void @unguardedloop(ptr %matrix, ptr %vector, 188; 189; CHECK-LABEL: @unguardedloop( 190; CHECK-NEXT: entry: 191; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[IROW:%.*]], i32 1) 192; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 193; CHECK-NEXT: br label [[LOOP:%.*]] 194; CHECK: loop: 195; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 196; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1 197; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], [[WIDE_TRIP_COUNT]] 198; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[RETURN:%.*]] 199; CHECK: return: 200; CHECK-NEXT: ret void 201; 202 i32 %irow, i32 %ilead) nounwind { 203entry: 204 br label %loop 205 206loop: 207 %rowidx = phi i32 [ 0, %entry ], [ %row.inc, %loop ] 208 %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ] 209 %diagidx = add nsw i32 %rowidx, %i 210 %diagidxw = sext i32 %diagidx to i64 211 %matrixp = getelementptr inbounds [0 x double], ptr %matrix, i32 0, i64 %diagidxw 212 %v1 = load double, ptr %matrixp 213 %iw = sext i32 %i to i64 214 %vectorp = getelementptr inbounds [0 x double], ptr %vector, i32 0, i64 %iw 215 %v2 = load double, ptr %vectorp 216 %row.inc = add nsw i32 %rowidx, %ilead 217 %i.inc = add nsw i32 %i, 1 218 %cmp196 = icmp slt i32 %i.inc, %irow 219 br i1 %cmp196, label %loop, label %return 220 221return: 222 ret void 223} 224 225; Remove %i which is only used by the exit test. 226; Verify that SCEV can still compute a backedge count from the sign 227; extended %n, used for pointer comparison by LFTR. 228; 229; TODO: Fix for PR13371 currently makes this impossible. See 230; IndVarSimplify.cpp hasConcreteDef(). We may want to change to undef rules. 231define void @geplftr(ptr %base, i32 %x, i32 %y, i32 %n) nounwind { 232; CHECK-LABEL: @geplftr( 233; CHECK-NEXT: entry: 234; CHECK-NEXT: [[X_EXT:%.*]] = sext i32 [[X:%.*]] to i64 235; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[BASE:%.*]], i64 [[X_EXT]] 236; CHECK-NEXT: [[Y_EXT:%.*]] = sext i32 [[Y:%.*]] to i64 237; CHECK-NEXT: [[ADD_PTR10:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 [[Y_EXT]] 238; CHECK-NEXT: [[LIM:%.*]] = add i32 [[X]], [[N:%.*]] 239; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult i32 [[X]], [[LIM]] 240; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 241; CHECK: loop.preheader: 242; CHECK-NEXT: br label [[LOOP:%.*]] 243; CHECK: loop: 244; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC:%.*]], [[LOOP]] ], [ [[X]], [[LOOP_PREHEADER]] ] 245; CHECK-NEXT: [[APTR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[ADD_PTR10]], [[LOOP_PREHEADER]] ] 246; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[APTR]], i32 1 247; CHECK-NEXT: store i8 3, ptr [[APTR]], align 1 248; CHECK-NEXT: [[INC]] = add nuw i32 [[I]], 1 249; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], [[LIM]] 250; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] 251; CHECK: exit.loopexit: 252; CHECK-NEXT: br label [[EXIT]] 253; CHECK: exit: 254; CHECK-NEXT: ret void 255; 256entry: 257 %x.ext = sext i32 %x to i64 258 %add.ptr = getelementptr inbounds i8, ptr %base, i64 %x.ext 259 %y.ext = sext i32 %y to i64 260 %add.ptr10 = getelementptr inbounds i8, ptr %add.ptr, i64 %y.ext 261 %lim = add i32 %x, %n 262 %cmp.ph = icmp ult i32 %x, %lim 263 br i1 %cmp.ph, label %loop, label %exit 264loop: 265 %i = phi i32 [ %x, %entry ], [ %inc, %loop ] 266 %aptr = phi ptr [ %add.ptr10, %entry ], [ %incdec.ptr, %loop ] 267 %incdec.ptr = getelementptr inbounds i8, ptr %aptr, i32 1 268 store i8 3, ptr %aptr 269 %inc = add i32 %i, 1 270 %cmp = icmp ult i32 %inc, %lim 271 br i1 %cmp, label %loop, label %exit 272 273exit: 274 ret void 275} 276 277; Exercise backedge taken count verification with a never-taken loop. 278define void @nevertaken() nounwind uwtable ssp { 279; CHECK-LABEL: @nevertaken( 280; CHECK-NEXT: entry: 281; CHECK-NEXT: br label [[LOOP:%.*]] 282; CHECK: loop: 283; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] 284; CHECK: exit: 285; CHECK-NEXT: ret void 286; 287entry: 288 br label %loop 289loop: 290 %i = phi i32 [ 0, %entry ], [ %inc, %loop ] 291 %inc = add nsw i32 %i, 1 292 %cmp = icmp sle i32 %inc, 0 293 br i1 %cmp, label %loop, label %exit 294 295exit: 296 ret void 297} 298 299; Test LFTR on an IV whose recurrence start is a non-unit pointer type. 300define void @aryptriv(ptr %base, i32 %n) nounwind { 301; CHECK-LABEL: @aryptriv( 302; CHECK-NEXT: entry: 303; CHECK-NEXT: [[IVEND:%.*]] = getelementptr inbounds [256 x i8], ptr [[BASE:%.*]], i32 0, i32 [[N:%.*]] 304; CHECK-NEXT: [[CMP_PH:%.*]] = icmp ult ptr [[BASE]], [[IVEND]] 305; CHECK-NEXT: br i1 [[CMP_PH]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] 306; CHECK: loop.preheader: 307; CHECK-NEXT: br label [[LOOP:%.*]] 308; CHECK: loop: 309; CHECK-NEXT: [[APTR:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[LOOP]] ], [ [[BASE]], [[LOOP_PREHEADER]] ] 310; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[APTR]], i32 1 311; CHECK-NEXT: store i8 3, ptr [[APTR]], align 1 312; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne ptr [[INCDEC_PTR]], [[IVEND]] 313; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] 314; CHECK: exit.loopexit: 315; CHECK-NEXT: br label [[EXIT]] 316; CHECK: exit: 317; CHECK-NEXT: ret void 318; 319entry: 320 %ivend = getelementptr inbounds [256 x i8], ptr %base, i32 0, i32 %n 321 %cmp.ph = icmp ult ptr %base, %ivend 322 br i1 %cmp.ph, label %loop, label %exit 323 324loop: 325 %aptr = phi ptr [ %base, %entry ], [ %incdec.ptr, %loop ] 326 %incdec.ptr = getelementptr inbounds i8, ptr %aptr, i32 1 327 store i8 3, ptr %aptr 328 %cmp = icmp ult ptr %incdec.ptr, %ivend 329 br i1 %cmp, label %loop, label %exit 330 331exit: 332 ret void 333} 334