1; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s
2
3; TODO: For now test for the `-epilogue-vectorization-minimum-VF` option. In
4; the future we need to replace this with a more meaningful test of the
5; epilogue vectorization cost-model.
6; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-minimum-VF=4 -S | FileCheck %s --check-prefix=CHECK-MIN-4
7; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -force-vector-interleave=1 -S | FileCheck %s --check-prefix=CHECK-MIN-IC-1
8
9target datalayout = "e-m:e-i64:64-n32:64"
10target triple = "powerpc64le-unknown-linux-gnu"
11
12; Do not vectorize epilogues for loops with minsize attribute
13; CHECK-LABEL: @f1
14; CHECK-NOT: vector.main.loop.iter.check
15; CHECK-NOT: vec.epilog.iter.check
16; CHECK-NOT: vec.epilog.ph
17; CHECK-NOT: vec.epilog.vector.body
18; CHECK-NOT: vec.epilog.middle.block
19; CHECK: ret void
20
21define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #0 {
22entry:
23  %cmp1 = icmp sgt i32 %N, 0
24  br i1 %cmp1, label %for.body.preheader, label %for.end
25
26for.body.preheader:                               ; preds = %entry
27  %wide.trip.count = zext i32 %N to i64
28  br label %for.body
29
30for.body:                                         ; preds = %for.body.preheader, %for.body
31  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
32  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
33  %0 = load float, ptr %arrayidx, align 4
34  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
35  %1 = load float, ptr %arrayidx2, align 4
36  %add = fadd fast float %0, %1
37  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
38  store float %add, ptr %arrayidx4, align 4
39  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
40  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
41  br i1 %exitcond, label %for.body, label %for.end.loopexit
42
43for.end.loopexit:                                 ; preds = %for.body
44  br label %for.end
45
46for.end:                                          ; preds = %for.end.loopexit, %entry
47  ret void
48}
49
50; Do not vectorize epilogues for loops with optsize attribute
51; CHECK-LABEL: @f2
52; CHECK-NOT: vector.main.loop.iter.check
53; CHECK-NOT: vec.epilog.iter.check
54; CHECK-NOT: vec.epilog.ph
55; CHECK-NOT: vec.epilog.vector.body
56; CHECK-NOT: vec.epilog.middle.block
57; CHECK: ret void
58
59define dso_local void @f2(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #1 {
60entry:
61  %cmp1 = icmp sgt i32 %N, 0
62  br i1 %cmp1, label %for.body.preheader, label %for.end
63
64for.body.preheader:                               ; preds = %entry
65  %wide.trip.count = zext i32 %N to i64
66  br label %for.body
67
68for.body:                                         ; preds = %for.body.preheader, %for.body
69  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
70  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
71  %0 = load float, ptr %arrayidx, align 4
72  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
73  %1 = load float, ptr %arrayidx2, align 4
74  %add = fadd fast float %0, %1
75  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
76  store float %add, ptr %arrayidx4, align 4
77  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
78  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
79  br i1 %exitcond, label %for.body, label %for.end.loopexit
80
81for.end.loopexit:                                 ; preds = %for.body
82  br label %for.end
83
84for.end:                                          ; preds = %for.end.loopexit, %entry
85  ret void
86}
87
88; Do not vectorize the epilogue for loops with VF*IC less than the default -epilogue-vectorization-minimum-VF of 16.
89; CHECK-MIN-IC-1-LABEL: @f3
90; CHECK-MIN-IC-1-NOT: vector.main.loop.iter.check
91; CHECK-MIN-IC-1-NOT: vec.epilog.iter.check
92; CHECK-MIN-IC-1-NOT: vec.epilog.ph
93; CHECK-MIN-IC-1-NOT: vec.epilog.vector.body
94; CHECK-MIN-IC-1-NOT: vec.epilog.middle.block
95; CHECK-MIN-IC-1: ret void
96
97; Specify a smaller minimum VF (via `-epilogue-vectorization-minimum-VF=4`) and
98; make sure the epilogue gets vectorized in that case.
99; CHECK-MIN-4-LABEL: @f3
100; CHECK-MIN-4: vector.main.loop.iter.check
101; CHECK-MIN-4: vec.epilog.iter.check
102; CHECK-MIN-4: vec.epilog.ph
103; CHECK-MIN-4: vec.epilog.vector.body
104; CHECK-MIN-4: vec.epilog.middle.block
105; CHECK-MIN-4: ret void
106
107; Default behaviour is to vectorize the epilogue for this loop.
108; CHECK-LABEL: @f3
109; CHECK: vector.main.loop.iter.check
110; CHECK: vec.epilog.iter.check
111; CHECK: vec.epilog.ph
112; CHECK: vec.epilog.vector.body
113; CHECK: vec.epilog.middle.block
114; CHECK: ret void
115
116define dso_local void @f3(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) {
117entry:
118  %cmp1 = icmp sgt i32 %N, 0
119  br i1 %cmp1, label %for.body.preheader, label %for.end
120
121for.body.preheader:                               ; preds = %entry
122  %wide.trip.count = zext i32 %N to i64
123  br label %for.body
124
125for.body:                                         ; preds = %for.body.preheader, %for.body
126  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
127  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
128  %0 = load float, ptr %arrayidx, align 4
129  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
130  %1 = load float, ptr %arrayidx2, align 4
131  %add = fadd fast float %0, %1
132  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
133  store float %add, ptr %arrayidx4, align 4
134  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
135  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
136  br i1 %exitcond, label %for.body, label %for.end.loopexit
137
138for.end.loopexit:                                 ; preds = %for.body
139  br label %for.end
140
141for.end:                                          ; preds = %for.end.loopexit, %entry
142  ret void
143}
144
145attributes #0 = { minsize }
146attributes #1 = { optsize }
147