1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=indvars -S | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 5 6; IV with constant start, preinc and postinc sign extends, with and without NSW. 7; IV rewrite only removes one sext. WidenIVs removes all three. 8define void @postincConstIV(ptr %base, i32 %limit) nounwind { 9; CHECK-LABEL: @postincConstIV( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 0) 12; CHECK-NEXT: [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1 13; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 14; CHECK-NEXT: br label [[LOOP:%.*]] 15; CHECK: loop: 16; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] 17; CHECK-NEXT: [[PREADR:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[INDVARS_IV]] 18; CHECK-NEXT: store i8 0, ptr [[PREADR]], align 1 19; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 20; CHECK-NEXT: [[POSTADR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVARS_IV_NEXT]] 21; CHECK-NEXT: store i8 0, ptr [[POSTADR]], align 1 22; CHECK-NEXT: [[POSTADRNSW:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[INDVARS_IV_NEXT]] 23; CHECK-NEXT: store i8 0, ptr [[POSTADRNSW]], align 1 24; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 25; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] 26; CHECK: exit: 27; CHECK-NEXT: br label [[RETURN:%.*]] 28; CHECK: return: 29; CHECK-NEXT: ret void 30; 31entry: 32 br label %loop 33loop: 34 %iv = phi i32 [ %postiv, %loop ], [ 0, %entry ] 35 %ivnsw = phi i32 [ %postivnsw, %loop ], [ 0, %entry ] 36 %preofs = sext i32 %iv to i64 37 %preadr = getelementptr i8, ptr %base, i64 %preofs 38 store i8 0, ptr %preadr 39 %postiv = add i32 %iv, 1 40 %postofs = sext i32 %postiv to i64 41 %postadr = getelementptr i8, ptr %base, i64 %postofs 42 store i8 0, ptr %postadr 43 %postivnsw = add nsw i32 %ivnsw, 1 44 %postofsnsw = sext i32 %postivnsw to i64 45 %postadrnsw = getelementptr inbounds i8, ptr %base, i64 %postofsnsw 46 store i8 0, ptr %postadrnsw 47 %cond = icmp sgt i32 %limit, %iv 48 br i1 %cond, label %loop, label %exit 49exit: 50 br label %return 51return: 52 ret void 53} 54 55; IV with nonconstant start, preinc and postinc sign extends, 56; with and without NSW. 57; As with postincConstIV, WidenIVs removes all three sexts. 58define void @postincVarIV(ptr %base, i32 %init, i32 %limit) nounwind { 59; CHECK-LABEL: @postincVarIV( 60; CHECK-NEXT: entry: 61; CHECK-NEXT: [[PRECOND:%.*]] = icmp sgt i32 [[LIMIT:%.*]], [[INIT:%.*]] 62; CHECK-NEXT: br i1 [[PRECOND]], label [[LOOP_PREHEADER:%.*]], label [[RETURN:%.*]] 63; CHECK: loop.preheader: 64; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INIT]] to i64 65; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMIT]] to i64 66; CHECK-NEXT: br label [[LOOP:%.*]] 67; CHECK: loop: 68; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[LOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] 69; CHECK-NEXT: [[PREADR:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[INDVARS_IV]] 70; CHECK-NEXT: store i8 0, ptr [[PREADR]], align 1 71; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 72; CHECK-NEXT: [[POSTADR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVARS_IV_NEXT]] 73; CHECK-NEXT: store i8 0, ptr [[POSTADR]], align 1 74; CHECK-NEXT: [[POSTADRNSW:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[INDVARS_IV_NEXT]] 75; CHECK-NEXT: store i8 0, ptr [[POSTADRNSW]], align 1 76; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 77; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] 78; CHECK: exit: 79; CHECK-NEXT: br label [[RETURN]] 80; CHECK: return: 81; CHECK-NEXT: ret void 82; 83entry: 84 %precond = icmp sgt i32 %limit, %init 85 br i1 %precond, label %loop, label %return 86loop: 87 %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ] 88 %ivnsw = phi i32 [ %postivnsw, %loop ], [ %init, %entry ] 89 %preofs = sext i32 %iv to i64 90 %preadr = getelementptr i8, ptr %base, i64 %preofs 91 store i8 0, ptr %preadr 92 %postiv = add i32 %iv, 1 93 %postofs = sext i32 %postiv to i64 94 %postadr = getelementptr i8, ptr %base, i64 %postofs 95 store i8 0, ptr %postadr 96 %postivnsw = add nsw i32 %ivnsw, 1 97 %postofsnsw = sext i32 %postivnsw to i64 98 %postadrnsw = getelementptr i8, ptr %base, i64 %postofsnsw 99 store i8 0, ptr %postadrnsw 100 %cond = icmp sgt i32 %limit, %postiv 101 br i1 %cond, label %loop, label %exit 102exit: 103 br label %return 104return: 105 ret void 106} 107 108; Test sign extend elimination in the inner and outer loop. 109; %outercount is straightforward to widen, besides being in an outer loop. 110; %innercount is currently blocked by lcssa, so is not widened. 111; %inneriv can be widened only after proving it has no signed-overflow 112; based on the loop test. 113define void @nestedIV(ptr %address, i32 %limit) nounwind { 114; CHECK-LABEL: @nestedIV( 115; CHECK-NEXT: entry: 116; CHECK-NEXT: [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1 117; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMITDEC]] to i64 118; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 1) 119; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[SMAX]] to i64 120; CHECK-NEXT: br label [[OUTERLOOP:%.*]] 121; CHECK: outerloop: 122; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ] 123; CHECK-NEXT: [[INNERCOUNT:%.*]] = phi i32 [ [[INNERCOUNT_MERGE:%.*]], [[OUTERMERGE]] ], [ 0, [[ENTRY]] ] 124; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV1]], -1 125; CHECK-NEXT: [[ADR1:%.*]] = getelementptr i8, ptr [[ADDRESS:%.*]], i64 [[TMP1]] 126; CHECK-NEXT: store i8 0, ptr [[ADR1]], align 1 127; CHECK-NEXT: br label [[INNERPREHEADER:%.*]] 128; CHECK: innerpreheader: 129; CHECK-NEXT: [[INNERPRECMP:%.*]] = icmp sgt i32 [[LIMITDEC]], [[INNERCOUNT]] 130; CHECK-NEXT: br i1 [[INNERPRECMP]], label [[INNERLOOP_PREHEADER:%.*]], label [[OUTERMERGE]] 131; CHECK: innerloop.preheader: 132; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INNERCOUNT]] to i64 133; CHECK-NEXT: br label [[INNERLOOP:%.*]] 134; CHECK: innerloop: 135; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ] 136; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 137; CHECK-NEXT: [[ADR2:%.*]] = getelementptr i8, ptr [[ADDRESS]], i64 [[INDVARS_IV]] 138; CHECK-NEXT: store i8 0, ptr [[ADR2]], align 1 139; CHECK-NEXT: [[ADR3:%.*]] = getelementptr i8, ptr [[ADDRESS]], i64 [[INDVARS_IV_NEXT]] 140; CHECK-NEXT: store i8 0, ptr [[ADR3]], align 1 141; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]] 142; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]] 143; CHECK: innerexit: 144; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ] 145; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 146; CHECK-NEXT: br label [[OUTERMERGE]] 147; CHECK: outermerge: 148; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP3]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ] 149; CHECK-NEXT: [[ADR4:%.*]] = getelementptr i8, ptr [[ADDRESS]], i64 [[INDVARS_IV1]] 150; CHECK-NEXT: store i8 0, ptr [[ADR4]], align 1 151; CHECK-NEXT: [[OFS5:%.*]] = sext i32 [[INNERCOUNT_MERGE]] to i64 152; CHECK-NEXT: [[ADR5:%.*]] = getelementptr i8, ptr [[ADDRESS]], i64 [[OFS5]] 153; CHECK-NEXT: store i8 0, ptr [[ADR5]], align 1 154; CHECK-NEXT: [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1 155; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT2]], [[WIDE_TRIP_COUNT]] 156; CHECK-NEXT: br i1 [[EXITCOND4]], label [[OUTERLOOP]], label [[RETURN:%.*]] 157; CHECK: return: 158; CHECK-NEXT: ret void 159; 160entry: 161 %limitdec = add i32 %limit, -1 162 br label %outerloop 163 164; Eliminate %ofs1 after widening outercount. 165; IV rewriting hoists a gep into this block. We don't like that. 166outerloop: 167 %outercount = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ] 168 %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ] 169 170 %outercountdec = add i32 %outercount, -1 171 %ofs1 = sext i32 %outercountdec to i64 172 %adr1 = getelementptr i8, ptr %address, i64 %ofs1 173 store i8 0, ptr %adr1 174 175 br label %innerpreheader 176 177innerpreheader: 178 %innerprecmp = icmp sgt i32 %limitdec, %innercount 179 br i1 %innerprecmp, label %innerloop, label %outermerge 180 181; Eliminate %ofs2 after widening inneriv. 182; Eliminate %ofs3 after normalizing sext(innerpostiv) 183; FIXME: We should check that indvars does not increase the number of 184; IVs in this loop. sext elimination plus LFTR currently results in 2 final 185; IVs. Waiting to remove LFTR. 186innerloop: 187 %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ] 188 %innerpostiv = add i32 %inneriv, 1 189 190 %ofs2 = sext i32 %inneriv to i64 191 %adr2 = getelementptr i8, ptr %address, i64 %ofs2 192 store i8 0, ptr %adr2 193 194 %ofs3 = sext i32 %innerpostiv to i64 195 %adr3 = getelementptr i8, ptr %address, i64 %ofs3 196 store i8 0, ptr %adr3 197 198 %innercmp = icmp sgt i32 %limitdec, %innerpostiv 199 br i1 %innercmp, label %innerloop, label %innerexit 200 201innerexit: 202 %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ] 203 br label %outermerge 204 205; Eliminate %ofs4 after widening outercount 206; TODO: Eliminate %ofs5 after removing lcssa 207outermerge: 208 %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ] 209 210 %ofs4 = sext i32 %outercount to i64 211 %adr4 = getelementptr i8, ptr %address, i64 %ofs4 212 store i8 0, ptr %adr4 213 214 %ofs5 = sext i32 %innercount.merge to i64 215 %adr5 = getelementptr i8, ptr %address, i64 %ofs5 216 store i8 0, ptr %adr5 217 218 %outerpostcount = add i32 %outercount, 1 219 %tmp47 = icmp slt i32 %outerpostcount, %limit 220 br i1 %tmp47, label %outerloop, label %return 221 222return: 223 ret void 224} 225