1; RUN: opt -passes=loop-unroll-and-jam -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s 2; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='loop-unroll-and-jam' -unroll-runtime -unroll-partial-threshold=60 < %s -S | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7; CHECK-LABEL: function 8; The explicit metadata here should force this to be unroll and jammed 4 times (hence the %.pre60.3) 9; CHECK: %.pre = phi i8 [ %.pre60.3, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ] 10; CHECK: %indvars.iv.3 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.3, %for.body4.us ] 11define void @function(ptr noalias nocapture %dst, i32 %dst_stride, ptr noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) { 12entry: 13 %idxprom = sext i32 %src_stride to i64 14 %cmp52 = icmp sgt i32 %height, 0 15 br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup 16 17for.cond1.preheader.lr.ph: ; preds = %entry 18 %cmp249 = icmp sgt i32 %width, 0 19 %idx.ext = sext i32 %dst_stride to i64 20 br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup 21 22for.cond1.preheader.us.preheader: ; preds = %for.cond1.preheader.lr.ph 23 %.pre.pre = load i8, ptr %src, align 1 24 %wide.trip.count = zext i32 %width to i64 25 br label %for.cond1.preheader.us 26 27for.cond1.preheader.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader 28 %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ] 29 %srcp.056.us.pn = phi ptr [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ] 30 %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] 31 %dst.addr.054.us = phi ptr [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ] 32 %srcp.056.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %idxprom 33 %.pre60 = load i8, ptr %srcp.056.us, align 1 34 br label %for.body4.us 35 36for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us 37 %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ] 38 %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ] 39 %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ] 40 %conv.us = zext i8 %1 to i32 41 %mul.us = mul nsw i32 %conv.us, %A 42 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 43 %arrayidx8.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %indvars.iv.next 44 %2 = load i8, ptr %arrayidx8.us, align 1 45 %conv9.us = zext i8 %2 to i32 46 %mul10.us = mul nsw i32 %conv9.us, %B 47 %conv14.us = zext i8 %0 to i32 48 %mul15.us = mul nsw i32 %conv14.us, %C 49 %arrayidx19.us = getelementptr inbounds i8, ptr %srcp.056.us, i64 %indvars.iv.next 50 %3 = load i8, ptr %arrayidx19.us, align 1 51 %conv20.us = zext i8 %3 to i32 52 %mul21.us = mul nsw i32 %conv20.us, %D 53 %add11.us = add i32 %mul.us, 32 54 %add16.us = add i32 %add11.us, %mul10.us 55 %add22.us = add i32 %add16.us, %mul15.us 56 %add23.us = add i32 %add22.us, %mul21.us 57 %4 = lshr i32 %add23.us, 6 58 %conv24.us = trunc i32 %4 to i8 59 %arrayidx26.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %indvars.iv 60 store i8 %conv24.us, ptr %arrayidx26.us, align 1 61 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 62 br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us 63 64for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us 65 %add.ptr.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %idx.ext 66 %inc30.us = add nuw nsw i32 %y.055.us, 1 67 %exitcond58 = icmp eq i32 %inc30.us, %height 68 br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !5 69 70for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry 71 ret void 72} 73 74; CHECK-LABEL: function2 75; The explicit metadata here should force this to be unroll and jammed, but 76; the count is left to thresholds. In this case 2 (hence %.pre60.1). 77; CHECK: %.pre = phi i8 [ %.pre60.1, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader.new ] 78; CHECK: %indvars.iv.1 = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next.1, %for.body4.us ] 79define void @function2(ptr noalias nocapture %dst, i32 %dst_stride, ptr noalias nocapture readonly %src, i32 %src_stride, i32 %A, i32 %B, i32 %C, i32 %D, i32 %width, i32 %height) { 80entry: 81 %idxprom = sext i32 %src_stride to i64 82 %cmp52 = icmp sgt i32 %height, 0 83 br i1 %cmp52, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup 84 85for.cond1.preheader.lr.ph: ; preds = %entry 86 %cmp249 = icmp sgt i32 %width, 0 87 %idx.ext = sext i32 %dst_stride to i64 88 br i1 %cmp249, label %for.cond1.preheader.us.preheader, label %for.cond.cleanup 89 90for.cond1.preheader.us.preheader: ; preds = %for.cond1.preheader.lr.ph 91 %.pre.pre = load i8, ptr %src, align 1 92 %wide.trip.count = zext i32 %width to i64 93 br label %for.cond1.preheader.us 94 95for.cond1.preheader.us: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.us.preheader 96 %.pre = phi i8 [ %.pre60, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %.pre.pre, %for.cond1.preheader.us.preheader ] 97 %srcp.056.us.pn = phi ptr [ %srcp.056.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %src, %for.cond1.preheader.us.preheader ] 98 %y.055.us = phi i32 [ %inc30.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %for.cond1.preheader.us.preheader ] 99 %dst.addr.054.us = phi ptr [ %add.ptr.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ %dst, %for.cond1.preheader.us.preheader ] 100 %srcp.056.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %idxprom 101 %.pre60 = load i8, ptr %srcp.056.us, align 1 102 br label %for.body4.us 103 104for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us 105 %0 = phi i8 [ %.pre60, %for.cond1.preheader.us ], [ %3, %for.body4.us ] 106 %1 = phi i8 [ %.pre, %for.cond1.preheader.us ], [ %2, %for.body4.us ] 107 %indvars.iv = phi i64 [ 0, %for.cond1.preheader.us ], [ %indvars.iv.next, %for.body4.us ] 108 %conv.us = zext i8 %1 to i32 109 %mul.us = mul nsw i32 %conv.us, %A 110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 111 %arrayidx8.us = getelementptr inbounds i8, ptr %srcp.056.us.pn, i64 %indvars.iv.next 112 %2 = load i8, ptr %arrayidx8.us, align 1 113 %conv9.us = zext i8 %2 to i32 114 %mul10.us = mul nsw i32 %conv9.us, %B 115 %conv14.us = zext i8 %0 to i32 116 %mul15.us = mul nsw i32 %conv14.us, %C 117 %arrayidx19.us = getelementptr inbounds i8, ptr %srcp.056.us, i64 %indvars.iv.next 118 %3 = load i8, ptr %arrayidx19.us, align 1 119 %conv20.us = zext i8 %3 to i32 120 %mul21.us = mul nsw i32 %conv20.us, %D 121 %add11.us = add i32 %mul.us, 32 122 %add16.us = add i32 %add11.us, %mul10.us 123 %add22.us = add i32 %add16.us, %mul15.us 124 %add23.us = add i32 %add22.us, %mul21.us 125 %4 = lshr i32 %add23.us, 6 126 %conv24.us = trunc i32 %4 to i8 127 %arrayidx26.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %indvars.iv 128 store i8 %conv24.us, ptr %arrayidx26.us, align 1 129 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 130 br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us 131 132for.cond1.for.cond.cleanup3_crit_edge.us: ; preds = %for.body4.us 133 %add.ptr.us = getelementptr inbounds i8, ptr %dst.addr.054.us, i64 %idx.ext 134 %inc30.us = add nuw nsw i32 %y.055.us, 1 135 %exitcond58 = icmp eq i32 %inc30.us, %height 136 br i1 %exitcond58, label %for.cond.cleanup, label %for.cond1.preheader.us, !llvm.loop !7 137 138for.cond.cleanup: ; preds = %for.cond1.for.cond.cleanup3_crit_edge.us, %for.cond1.preheader.lr.ph, %entry 139 ret void 140} 141 142!5 = distinct !{!5, !6} 143!6 = !{!"llvm.loop.unroll_and_jam.count", i32 4} 144!7 = distinct !{!7, !8} 145!8 = !{!"llvm.loop.unroll_and_jam.enable"} 146