1; REQUIRES: asserts 2; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s 3 4target triple="aarch64--linux-gnu" 5 6; This test shows that comparison and next iteration IV have zero cost if the 7; vector loop gets executed exactly once with the given VF. 8define i64 @test(ptr %a, ptr %b) #0 { 9; CHECK-LABEL: LV: Checking a loop in 'test' 10; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 11; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 12; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 13; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 14; CHECK: Cost for VF 8: 30 15; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 16; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 17; CHECK: Cost for VF 16: 56 18; CHECK: LV: Selecting VF: 16 19entry: 20 br label %for.body 21 22exit: ; preds = %for.body 23 ret i64 %add 24 25for.body: ; preds = %entry, %for.body 26 %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 27 %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] 28 %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv 29 %0 = load i8, ptr %arrayidx, align 1 30 %conv = zext i8 %0 to i64 31 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %i.iv 32 %1 = load i8, ptr %arrayidx2, align 1 33 %conv3 = zext i8 %1 to i64 34 %div = udiv i64 %conv3, %conv 35 %add = add i64 %div, %sum 36 %i.iv.next = add nuw nsw i64 %i.iv, 1 37 %exitcond.not = icmp eq i64 %i.iv.next, 16 38 br i1 %exitcond.not, label %exit, label %for.body 39} 40 41; Same as above, but in the next iteration IV has extra users, and thus, the cost is not zero. 42define i64 @test_external_iv_user(ptr %a, ptr %b) #0 { 43; CHECK-LABEL: LV: Checking a loop in 'test_external_iv_user' 44; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 45; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 46; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 47; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 48; CHECK: Cost for VF 8: 30 49; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 50; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 51; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 52; CHECK: Cost for VF 16: 57 53; CHECK: LV: Selecting VF: vscale x 2 54entry: 55 br label %for.body 56 57for.body: ; preds = %entry, %for.body 58 %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 59 %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] 60 %arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv 61 %0 = load i8, ptr %arrayidx, align 1 62 %conv = zext i8 %0 to i64 63 %i.iv.next = add nuw nsw i64 %i.iv, 1 64 %arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next 65 %1 = load i8, ptr %arrayidx2, align 1 66 %conv3 = zext i8 %1 to i64 67 %div = udiv i64 %conv3, %conv 68 %add = add i64 %sum, %div 69 %exitcond.not = icmp eq i64 %i.iv.next, 16 70 br i1 %exitcond.not, label %exit, label %for.body 71 72exit: ; preds = %for.body 73 ret i64 %add 74} 75 76; Same as above but with two IVs without extra users. They all have zero cost when VF equals the number of iterations. 77define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 { 78; CHECK-LABEL: LV: Checking a loop in 'test_two_ivs' 79; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1 80; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 81; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1 82; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] 83; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16 84; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 85; CHECK: Cost for VF 8: 24 86; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 87; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] 88; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 89; CHECK: Cost for VF 16: 42 90; CHECK: LV: Selecting VF: 16 91entry: 92 br label %for.body 93 94exit: ; preds = %for.body 95 ret i64 %add 96 97for.body: ; preds = %entry, %for.body 98 %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ] 99 %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ] 100 %sum = phi i64 [ 0, %entry ], [ %add, %for.body ] 101 %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv 102 %0 = load i8, ptr %arrayidx, align 1 103 %conv = zext i8 %0 to i64 104 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %j.iv 105 %1 = load i8, ptr %arrayidx2, align 1 106 %conv3 = zext i8 %1 to i64 107 %mul = mul nuw nsw i64 %conv3, %conv 108 %add = add i64 %mul, %sum 109 %i.iv.next = add nuw nsw i64 %i.iv, 1 110 %j.iv.next = add nuw nsw i64 %j.iv, 1 111 %exitcond.not = icmp eq i64 %i.iv.next, 16 112 br i1 %exitcond.not, label %exit, label %for.body 113} 114 115define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src) { 116; CHECK-LABEL: LV: Checking a loop in 'test_extra_cmp_user' 117; CHECK: Cost of 4 for VF 8: induction instruction %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 118; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 119; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %indvars.iv.next, 16 120; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 121; CHECK: Cost for VF 8: 12 122; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 123; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next> 124; CHECK: Cost for VF 16: 4 125; CHECK: LV: Selecting VF: 16 126entry: 127 br label %for.body 128 129for.body: 130 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 131 %arrayidx = getelementptr inbounds nuw i8, ptr %src, i64 %indvars.iv 132 %0 = load i8, ptr %arrayidx, align 4 133 %arrayidx2 = getelementptr inbounds nuw i8, ptr %dst, i64 %indvars.iv 134 %1 = load i8, ptr %arrayidx2, align 4 135 %add = add nsw i8 %1, %0 136 store i8 %add, ptr %arrayidx2, align 4 137 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 138 %exitcond.not = icmp eq i64 %indvars.iv.next, 16 139 br i1 %exitcond.not, label %exit, label %for.body 140 141exit: 142 ret i1 %exitcond.not 143} 144 145attributes #0 = { vscale_range(1, 16) "target-features"="+sve" } 146