1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-count=2 | FileCheck %s 3 4; Make sure the loop is unrolled without a remainder loop based on an assumption 5; that the least significant bit is known to be zero. 6 7define dso_local void @assumeDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr { 8; CHECK-LABEL: @assumeDivisibleTC( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[AND:%.*]] = and i32 [[P:%.*]], 1 11; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 12; CHECK-NEXT: br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]] 13; CHECK: guarded: 14; CHECK-NEXT: [[REM:%.*]] = urem i32 [[Q:%.*]], 2 15; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 16; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) 17; CHECK-NEXT: [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]] 18; CHECK-NEXT: [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]] 19; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0 20; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]] 21; CHECK: for.body.preheader: 22; CHECK-NEXT: br label [[FOR_BODY:%.*]] 23; CHECK: for.body: 24; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] 25; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]] 26; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 27; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP0]], 3 28; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]] 29; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1 30; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1 31; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]] 32; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1 33; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP1]], 3 34; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]] 35; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1 36; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[I_011]], 2 37; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]] 38; CHECK-NEXT: br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] 39; CHECK: exit.loopexit: 40; CHECK-NEXT: br label [[EXIT]] 41; CHECK: exit: 42; CHECK-NEXT: ret void 43; 44entry: 45 %and = and i32 %p, 1 46 %cmp = icmp eq i32 %and, 0 47 br i1 %cmp, label %guarded, label %exit 48 49guarded: 50 %rem = urem i32 %q, 2 51 %cmp2 = icmp eq i32 %rem, 0 52 tail call void @llvm.assume(i1 %cmp2) 53 %gt = icmp sgt i32 %p, %q 54 %n = select i1 %gt, i32 %p, i32 %q 55 %cmp110 = icmp sgt i32 %n, 0 56 br i1 %cmp110, label %for.body, label %exit 57 58for.body: 59 %i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ] 60 %arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011 61 %0 = load i8, ptr %arrayidx, align 1 62 %add = add i8 %0, 3 63 %arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011 64 store i8 %add, ptr %arrayidx4, align 1 65 %inc = add nuw nsw i32 %i.011, 1 66 %cmp1 = icmp slt i32 %inc, %n 67 br i1 %cmp1, label %for.body, label %exit 68 69exit: 70 ret void 71} 72 73; Make sure the loop is unrolled with a remainder loop when the trip-count 74; is not provably divisible by the unroll factor. 75 76define dso_local void @cannotProveDivisibleTC(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i32 %p, i32 %q) local_unnamed_addr { 77; CHECK-LABEL: @cannotProveDivisibleTC( 78; CHECK-NEXT: entry: 79; CHECK-NEXT: [[AND:%.*]] = and i32 [[P:%.*]], 6 80; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 81; CHECK-NEXT: br i1 [[CMP]], label [[GUARDED:%.*]], label [[EXIT:%.*]] 82; CHECK: guarded: 83; CHECK-NEXT: [[REM:%.*]] = urem i32 [[Q:%.*]], 2 84; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[REM]], 0 85; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP2]]) 86; CHECK-NEXT: [[GT:%.*]] = icmp sgt i32 [[P]], [[Q]] 87; CHECK-NEXT: [[N:%.*]] = select i1 [[GT]], i32 [[P]], i32 [[Q]] 88; CHECK-NEXT: [[CMP110:%.*]] = icmp sgt i32 [[N]], 0 89; CHECK-NEXT: br i1 [[CMP110]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT]] 90; CHECK: for.body.preheader: 91; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 92; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[N]], 1 93; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 1 94; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] 95; CHECK: for.body.preheader.new: 96; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]] 97; CHECK-NEXT: br label [[FOR_BODY:%.*]] 98; CHECK: for.body: 99; CHECK-NEXT: [[I_011:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_1:%.*]], [[FOR_BODY]] ] 100; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_1:%.*]], [[FOR_BODY]] ] 101; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i32 [[I_011]] 102; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 103; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP2]], 3 104; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i32 [[I_011]] 105; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX4]], align 1 106; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[I_011]], 1 107; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[INC]] 108; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1 109; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP3]], 3 110; CHECK-NEXT: [[ARRAYIDX4_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INC]] 111; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX4_1]], align 1 112; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[I_011]], 2 113; CHECK-NEXT: [[NITER_NEXT_1]] = add i32 [[NITER]], 2 114; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NEXT_1]], [[UNROLL_ITER]] 115; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] 116; CHECK: exit.loopexit.unr-lcssa.loopexit: 117; CHECK-NEXT: [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ] 118; CHECK-NEXT: br label [[EXIT_LOOPEXIT_UNR_LCSSA]] 119; CHECK: exit.loopexit.unr-lcssa: 120; CHECK-NEXT: [[I_011_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[I_011_UNR_PH]], [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] 121; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 122; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[EXIT_LOOPEXIT:%.*]] 123; CHECK: for.body.epil.preheader: 124; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] 125; CHECK: for.body.epil: 126; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[I_011_UNR]] 127; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_EPIL]], align 1 128; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3 129; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[I_011_UNR]] 130; CHECK-NEXT: store i8 [[ADD_EPIL]], ptr [[ARRAYIDX4_EPIL]], align 1 131; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] 132; CHECK: exit.loopexit: 133; CHECK-NEXT: br label [[EXIT]] 134; CHECK: exit: 135; CHECK-NEXT: ret void 136; 137entry: 138 %and = and i32 %p, 6 139 %cmp = icmp eq i32 %and, 0 140 br i1 %cmp, label %guarded, label %exit 141 142guarded: 143 %rem = urem i32 %q, 2 144 %cmp2 = icmp eq i32 %rem, 0 145 tail call void @llvm.assume(i1 %cmp2) 146 %gt = icmp sgt i32 %p, %q 147 %n = select i1 %gt, i32 %p, i32 %q 148 %cmp110 = icmp sgt i32 %n, 0 149 br i1 %cmp110, label %for.body, label %exit 150 151for.body: 152 %i.011 = phi i32 [ %inc, %for.body ], [ 0, %guarded ] 153 %arrayidx = getelementptr inbounds i8, ptr %b, i32 %i.011 154 %0 = load i8, ptr %arrayidx, align 1 155 %add = add i8 %0, 3 156 %arrayidx4 = getelementptr inbounds i8, ptr %a, i32 %i.011 157 store i8 %add, ptr %arrayidx4, align 1 158 %inc = add nuw nsw i32 %i.011, 1 159 %cmp1 = icmp slt i32 %inc, %n 160 br i1 %cmp1, label %for.body, label %exit 161 162exit: 163 ret void 164} 165 166declare void @llvm.assume(i1 noundef) nofree nosync nounwind willreturn 167