xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/fully-unrolled-cost.ll (revision 795e35a653b977bf637d1d049423adc8a63cd20d)
1; REQUIRES: asserts
2; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
3
4target triple="aarch64--linux-gnu"
5
6; This test shows that comparison and next iteration IV have zero cost if the
7; vector loop gets executed exactly once with the given VF.
8define i64 @test(ptr %a, ptr %b) #0 {
9; CHECK-LABEL: LV: Checking a loop in 'test'
10; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
11; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
12; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
13; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
14; CHECK: Cost for VF 8: 30
15; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
16; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
17; CHECK: Cost for VF 16: 56
18; CHECK: LV: Selecting VF: 16
19entry:
20  br label %for.body
21
22exit:                                 ; preds = %for.body
23  ret i64 %add
24
25for.body:                                         ; preds = %entry, %for.body
26  %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
27  %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
28  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
29  %0 = load i8, ptr %arrayidx, align 1
30  %conv = zext i8 %0 to i64
31  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %i.iv
32  %1 = load i8, ptr %arrayidx2, align 1
33  %conv3 = zext i8 %1 to i64
34  %div = udiv i64 %conv3, %conv
35  %add = add i64 %div, %sum
36  %i.iv.next = add nuw nsw i64 %i.iv, 1
37  %exitcond.not = icmp eq i64 %i.iv.next, 16
38  br i1 %exitcond.not, label %exit, label %for.body
39}
40
41; Same as above, but in the next iteration IV has extra users, and thus, the cost is not zero.
42define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
43; CHECK-LABEL: LV: Checking a loop in 'test_external_iv_user'
44; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
45; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
46; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
47; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
48; CHECK: Cost for VF 8: 30
49; CHECK-NEXT: Cost of 1 for VF 16: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
50; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
51; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
52; CHECK: Cost for VF 16: 57
53; CHECK: LV: Selecting VF: vscale x 2
54entry:
55  br label %for.body
56
57for.body:                                         ; preds = %entry, %for.body
58  %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
59  %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
60  %arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv
61  %0 = load i8, ptr %arrayidx, align 1
62  %conv = zext i8 %0 to i64
63  %i.iv.next = add nuw nsw i64 %i.iv, 1
64  %arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next
65  %1 = load i8, ptr %arrayidx2, align 1
66  %conv3 = zext i8 %1 to i64
67  %div = udiv i64 %conv3, %conv
68  %add = add i64 %sum, %div
69  %exitcond.not = icmp eq i64 %i.iv.next, 16
70  br i1 %exitcond.not, label %exit, label %for.body
71
72exit:                                 ; preds = %for.body
73  ret i64 %add
74}
75
76; Same as above but with two IVs without extra users. They all have zero cost when VF equals the number of iterations.
77define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
78; CHECK-LABEL: LV: Checking a loop in 'test_two_ivs'
79; CHECK: Cost of 1 for VF 8: induction instruction   %i.iv.next = add nuw nsw i64 %i.iv, 1
80; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
81; CHECK-NEXT: Cost of 1 for VF 8: induction instruction   %j.iv.next = add nuw nsw i64 %j.iv, 1
82; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
83; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %i.iv.next, 16
84; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
85; CHECK: Cost for VF 8: 24
86; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
87; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
88; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<{{.+}}> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
89; CHECK: Cost for VF 16: 42
90; CHECK: LV: Selecting VF: 16
91entry:
92  br label %for.body
93
94exit:                                 ; preds = %for.body
95  ret i64 %add
96
97for.body:                                         ; preds = %entry, %for.body
98  %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
99  %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
100  %sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
101  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
102  %0 = load i8, ptr %arrayidx, align 1
103  %conv = zext i8 %0 to i64
104  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %j.iv
105  %1 = load i8, ptr %arrayidx2, align 1
106  %conv3 = zext i8 %1 to i64
107  %mul = mul nuw nsw i64 %conv3, %conv
108  %add = add i64 %mul, %sum
109  %i.iv.next = add nuw nsw i64 %i.iv, 1
110  %j.iv.next = add nuw nsw i64 %j.iv, 1
111  %exitcond.not = icmp eq i64 %i.iv.next, 16
112  br i1 %exitcond.not, label %exit, label %for.body
113}
114
115define i1 @test_extra_cmp_user(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src) {
116; CHECK-LABEL: LV: Checking a loop in 'test_extra_cmp_user'
117; CHECK: Cost of 4 for VF 8: induction instruction   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
118; CHECK-NEXT: Cost of 0 for VF 8: induction instruction   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
119; CHECK-NEXT: Cost of 4 for VF 8: exit condition instruction   %exitcond.not = icmp eq i64 %indvars.iv.next, 16
120; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
121; CHECK: Cost for VF 8: 12
122; CHECK-NEXT: Cost of 0 for VF 16: induction instruction   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
123; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
124; CHECK: Cost for VF 16: 4
125; CHECK: LV: Selecting VF: 16
126entry:
127  br label %for.body
128
129for.body:
130  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
131  %arrayidx = getelementptr inbounds nuw i8, ptr %src, i64 %indvars.iv
132  %0 = load i8, ptr %arrayidx, align 4
133  %arrayidx2 = getelementptr inbounds nuw i8, ptr %dst, i64 %indvars.iv
134  %1 = load i8, ptr %arrayidx2, align 4
135  %add = add nsw i8 %1, %0
136  store i8 %add, ptr %arrayidx2, align 4
137  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
138  %exitcond.not = icmp eq i64 %indvars.iv.next, 16
139  br i1 %exitcond.not, label %exit, label %for.body
140
141exit:
142  ret i1 %exitcond.not
143}
144
145attributes #0 = { vscale_range(1, 16) "target-features"="+sve" }
146