1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -instcombine -S -enable-new-pm=0 | FileCheck %s 3; RUN: opt < %s -passes='require<loops>,instcombine' -S | FileCheck %s 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7define i32 @foo(i8* nocapture readnone %match, i32 %cur_match, i32 %best_len, i32 %scan_end, i32* nocapture readonly %prev, i32 %limit, i32 %chain_length, i8* nocapture readonly %win, i32 %wmask) { 8; CHECK-LABEL: @foo( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[IDX_EXT2:%.*]] = zext i32 [[CUR_MATCH:%.*]] to i64 11; CHECK-NEXT: [[ADD_PTR4:%.*]] = getelementptr inbounds i8, i8* [[WIN:%.*]], i64 [[IDX_EXT2]] 12; CHECK-NEXT: [[IDX_EXT1:%.*]] = zext i32 [[BEST_LEN:%.*]] to i64 13; CHECK-NEXT: [[ADD_PTR25:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR4]], i64 [[IDX_EXT1]] 14; CHECK-NEXT: [[ADD_PTR36:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR25]], i64 -1 15; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[ADD_PTR36]] to i32* 16; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 17; CHECK-NEXT: [[CMP7:%.*]] = icmp eq i32 [[TMP1]], [[SCAN_END:%.*]] 18; CHECK-NEXT: br i1 [[CMP7]], label [[DO_END:%.*]], label [[IF_THEN_LR_PH:%.*]] 19; CHECK: if.then.lr.ph: 20; CHECK-NEXT: br label [[IF_THEN:%.*]] 21; CHECK: do.body: 22; CHECK-NEXT: [[IDX_EXT:%.*]] = zext i32 [[TMP4:%.*]] to i64 23; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[WIN]], i64 [[IDX_EXT1]] 24; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 -1 25; CHECK-NEXT: [[ADD_PTR3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR2]], i64 [[IDX_EXT]] 26; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[ADD_PTR3]] to i32* 27; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 28; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP3]], [[SCAN_END]] 29; CHECK-NEXT: br i1 [[CMP]], label [[DO_END]], label [[IF_THEN]] 30; CHECK: if.then: 31; CHECK-NEXT: [[CUR_MATCH_ADDR_09:%.*]] = phi i32 [ [[CUR_MATCH]], [[IF_THEN_LR_PH]] ], [ [[TMP4]], [[DO_BODY:%.*]] ] 32; CHECK-NEXT: [[CHAIN_LENGTH_ADDR_08:%.*]] = phi i32 [ [[CHAIN_LENGTH:%.*]], [[IF_THEN_LR_PH]] ], [ [[DEC:%.*]], [[DO_BODY]] ] 33; CHECK-NEXT: [[AND:%.*]] = and i32 [[CUR_MATCH_ADDR_09]], [[WMASK:%.*]] 34; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[AND]] to i64 35; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREV:%.*]], i64 [[IDXPROM]] 36; CHECK-NEXT: [[TMP4]] = load i32, i32* [[ARRAYIDX]], align 4 37; CHECK-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP4]], [[LIMIT:%.*]] 38; CHECK-NEXT: br i1 [[CMP4]], label [[LAND_LHS_TRUE:%.*]], label [[DO_END]] 39; CHECK: land.lhs.true: 40; CHECK-NEXT: [[DEC]] = add i32 [[CHAIN_LENGTH_ADDR_08]], -1 41; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i32 [[DEC]], 0 42; CHECK-NEXT: br i1 [[CMP5]], label [[DO_END]], label [[DO_BODY]] 43; CHECK: do.end: 44; CHECK-NEXT: [[CONT_0:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 0, [[IF_THEN]] ], [ 0, [[LAND_LHS_TRUE]] ], [ 1, [[DO_BODY]] ] 45; CHECK-NEXT: ret i32 [[CONT_0]] 46; 47entry: 48 %idx.ext2 = zext i32 %cur_match to i64 49 %add.ptr4 = getelementptr inbounds i8, i8* %win, i64 %idx.ext2 50 %idx.ext1 = zext i32 %best_len to i64 51 %add.ptr25 = getelementptr inbounds i8, i8* %add.ptr4, i64 %idx.ext1 52 %add.ptr36 = getelementptr inbounds i8, i8* %add.ptr25, i64 -1 53 %0 = bitcast i8* %add.ptr36 to i32* 54 %1 = load i32, i32* %0, align 4 55 %cmp7 = icmp eq i32 %1, %scan_end 56 br i1 %cmp7, label %do.end, label %if.then.lr.ph 57 58if.then.lr.ph: ; preds = %entry 59 br label %if.then 60 61do.body: ; preds = %land.lhs.true 62 %chain_length.addr.0 = phi i32 [ %dec, %land.lhs.true ] 63 %cur_match.addr.0 = phi i32 [ %4, %land.lhs.true ] 64 %idx.ext = zext i32 %cur_match.addr.0 to i64 65 %add.ptr = getelementptr inbounds i8, i8* %win, i64 %idx.ext 66 %add.ptr2 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext1 67 %add.ptr3 = getelementptr inbounds i8, i8* %add.ptr2, i64 -1 68 %2 = bitcast i8* %add.ptr3 to i32* 69 %3 = load i32, i32* %2, align 4 70 %cmp = icmp eq i32 %3, %scan_end 71 br i1 %cmp, label %do.end, label %if.then 72 73if.then: ; preds = %if.then.lr.ph, %do.body 74 %cur_match.addr.09 = phi i32 [ %cur_match, %if.then.lr.ph ], [ %cur_match.addr.0, %do.body ] 75 %chain_length.addr.08 = phi i32 [ %chain_length, %if.then.lr.ph ], [ %chain_length.addr.0, %do.body ] 76 %and = and i32 %cur_match.addr.09, %wmask 77 %idxprom = zext i32 %and to i64 78 %arrayidx = getelementptr inbounds i32, i32* %prev, i64 %idxprom 79 %4 = load i32, i32* %arrayidx, align 4 80 %cmp4 = icmp ugt i32 %4, %limit 81 br i1 %cmp4, label %land.lhs.true, label %do.end 82 83land.lhs.true: ; preds = %if.then 84 %dec = add i32 %chain_length.addr.08, -1 85 %cmp5 = icmp eq i32 %dec, 0 86 br i1 %cmp5, label %do.end, label %do.body 87 88do.end: ; preds = %do.body, %land.lhs.true, %if.then, %entry 89 %cont.0 = phi i32 [ 1, %entry ], [ 0, %if.then ], [ 0, %land.lhs.true ], [ 1, %do.body ] 90 ret i32 %cont.0 91} 92 93declare void @blackhole(<2 x i8*>) 94 95define void @PR37005(i8* %base, i8** %in) { 96; CHECK-LABEL: @PR37005( 97; CHECK-NEXT: entry: 98; CHECK-NEXT: br label [[LOOP:%.*]] 99; CHECK: loop: 100; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef 101; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1> 102; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64> 103; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], <i64 14, i64 14> 104; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], <i64 1125899906842496, i64 1125899906842496> 105; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 80 106; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, i8* [[E51]], <2 x i64> [[SL1]] 107; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) 108; CHECK-NEXT: br label [[LOOP]] 109; 110entry: 111 br label %loop 112 113loop: 114 %e1 = getelementptr inbounds i8*, i8** %in, i64 undef 115 %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 116 %bc1 = bitcast i8** %e2 to <2 x i8*>* 117 %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0 118 %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1> 119 %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64> 120 %lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21> 121 %sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7> 122 %e5 = getelementptr inbounds i8, i8* %base, <2 x i64> %sl1 123 %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80 124 call void @blackhole(<2 x i8*> %e6) 125 br label %loop 126} 127 128define void @PR37005_2(i8* %base, i8** %in) { 129; CHECK-LABEL: @PR37005_2( 130; CHECK-NEXT: entry: 131; CHECK-NEXT: br label [[LOOP:%.*]] 132; CHECK: loop: 133; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef 134; CHECK-NEXT: [[PI1:%.*]] = ptrtoint i8** [[E2]] to i64 135; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[PI1]], 14 136; CHECK-NEXT: [[SL1:%.*]] = and i64 [[TMP0]], 1125899906842496 137; CHECK-NEXT: [[E51:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], <2 x i64> <i64 80, i64 60> 138; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E51]], i64 [[SL1]] 139; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) 140; CHECK-NEXT: br label [[LOOP]] 141; 142entry: 143 br label %loop 144 145loop: 146 %e1 = getelementptr inbounds i8*, i8** %in, i64 undef 147 %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 148 %pi1 = ptrtoint i8** %e2 to i64 149 %lr1 = lshr i64 %pi1, 21 150 %sl1 = shl nuw nsw i64 %lr1, 7 151 %e5 = getelementptr inbounds i8, i8* %base, i64 %sl1 152 %e6 = getelementptr inbounds i8, i8* %e5, <2 x i64> <i64 80, i64 60> 153 call void @blackhole(<2 x i8*> %e6) 154 br label %loop 155} 156 157define void @PR37005_3(<2 x i8*> %base, i8** %in) { 158; CHECK-LABEL: @PR37005_3( 159; CHECK-NEXT: entry: 160; CHECK-NEXT: br label [[LOOP:%.*]] 161; CHECK: loop: 162; CHECK-NEXT: [[E2:%.*]] = getelementptr inbounds i8*, i8** [[IN:%.*]], i64 undef 163; CHECK-NEXT: [[E4:%.*]] = getelementptr inbounds i8*, i8** [[E2]], <2 x i64> <i64 0, i64 1> 164; CHECK-NEXT: [[PI1:%.*]] = ptrtoint <2 x i8**> [[E4]] to <2 x i64> 165; CHECK-NEXT: [[TMP0:%.*]] = lshr <2 x i64> [[PI1]], <i64 14, i64 14> 166; CHECK-NEXT: [[SL1:%.*]] = and <2 x i64> [[TMP0]], <i64 1125899906842496, i64 1125899906842496> 167; CHECK-NEXT: [[E5:%.*]] = getelementptr inbounds i8, <2 x i8*> [[BASE:%.*]], i64 80 168; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, <2 x i8*> [[E5]], <2 x i64> [[SL1]] 169; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) 170; CHECK-NEXT: br label [[LOOP]] 171; 172entry: 173 br label %loop 174 175loop: 176 %e1 = getelementptr inbounds i8*, i8** %in, i64 undef 177 %e2 = getelementptr inbounds i8*, i8** %e1, i64 6 178 %bc1 = bitcast i8** %e2 to <2 x i8*>* 179 %e3 = getelementptr inbounds <2 x i8*>, <2 x i8*>* %bc1, i64 0, i64 0 180 %e4 = getelementptr inbounds i8*, i8** %e3, <2 x i64> <i64 0, i64 1> 181 %pi1 = ptrtoint <2 x i8**> %e4 to <2 x i64> 182 %lr1 = lshr <2 x i64> %pi1, <i64 21, i64 21> 183 %sl1 = shl nuw nsw <2 x i64> %lr1, <i64 7, i64 7> 184 %e5 = getelementptr inbounds i8, <2 x i8*> %base, <2 x i64> %sl1 185 %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80 186 call void @blackhole(<2 x i8*> %e6) 187 br label %loop 188} 189 190; This would crash because we did not expect to be able to constant fold a GEP. 191 192define void @PR51485(<2 x i64> %v) { 193; CHECK-LABEL: @PR51485( 194; CHECK-NEXT: entry: 195; CHECK-NEXT: br label [[LOOP:%.*]] 196; CHECK: loop: 197; CHECK-NEXT: [[SL1:%.*]] = shl nuw nsw <2 x i64> [[V:%.*]], <i64 7, i64 7> 198; CHECK-NEXT: [[E6:%.*]] = getelementptr inbounds i8, i8* getelementptr inbounds (i8, i8* bitcast (void (<2 x i64>)* @PR51485 to i8*), i64 80), <2 x i64> [[SL1]] 199; CHECK-NEXT: call void @blackhole(<2 x i8*> [[E6]]) 200; CHECK-NEXT: br label [[LOOP]] 201; 202entry: 203 br label %loop 204 205loop: 206 %sl1 = shl nuw nsw <2 x i64> %v, <i64 7, i64 7> 207 %e5 = getelementptr inbounds i8, i8* bitcast (void (<2 x i64>)* @PR51485 to i8*), <2 x i64> %sl1 208 %e6 = getelementptr inbounds i8, <2 x i8*> %e5, i64 80 209 call void @blackhole(<2 x i8*> %e6) 210 br label %loop 211} 212 213; Avoid folding the GEP outside the loop to inside, and increasing loop 214; instruction count. 215define float @gep_cross_loop(i64* %_arg_, float* %_arg_3, float %_arg_8) 216; CHECK-LABEL: @gep_cross_loop( 217; CHECK-NEXT: entry: 218; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[_ARG_:%.*]], align 8 219; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, float* [[_ARG_3:%.*]], i64 [[TMP0]] 220; CHECK-NEXT: br label [[FOR_COND_I:%.*]] 221; CHECK: for.cond.i: 222; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD11_I:%.*]], [[FOR_BODY_I:%.*]] ] 223; CHECK-NEXT: [[SUM:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD_I:%.*]], [[FOR_BODY_I]] ] 224; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 17 225; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_I]], label [[FOR_COND_I_I_I_PREHEADER:%.*]] 226; CHECK: for.cond.i.i.i.preheader: 227; CHECK-NEXT: ret float [[SUM]] 228; CHECK: for.body.i: 229; CHECK-NEXT: [[ARRAYIDX_I84_I:%.*]] = getelementptr inbounds float, float* [[ADD_PTR]], i64 [[IDX]] 230; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[ARRAYIDX_I84_I]], align 4 231; CHECK-NEXT: [[ADD_I]] = fadd fast float [[SUM]], [[TMP1]] 232; CHECK-NEXT: [[ADD11_I]] = add nuw nsw i64 [[IDX]], 1 233; CHECK-NEXT: br label [[FOR_COND_I]] 234; 235{ 236entry: 237 %0 = load i64, i64* %_arg_, align 8 238 %add.ptr = getelementptr inbounds float, float* %_arg_3, i64 %0 239 br label %for.cond.i 240 241for.cond.i: ; preds = %for.body.i, %entry 242 %idx = phi i64 [ 0, %entry ], [ %add11.i, %for.body.i ] 243 %sum = phi float [ 0.000000e+00, %entry ], [ %add.i, %for.body.i ] 244 %cmp = icmp ule i64 %idx, 16 245 br i1 %cmp, label %for.body.i, label %for.cond.i.i.i.preheader 246 247for.cond.i.i.i.preheader: ; preds = %for.cond.i 248 ret float %sum 249 250for.body.i: ; preds = %for.cond.i 251 %arrayidx.i84.i = getelementptr inbounds float, float * %add.ptr, i64 %idx 252 %1 = load float, float* %arrayidx.i84.i, align 4 253 %add.i = fadd fast float %sum, %1 254 %add11.i = add nsw i64 %idx, 1 255 br label %for.cond.i 256} 257