xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll (revision 7d7577256b76e4293f455b8093504d5f7044ab4b)
1; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -S | FileCheck %s
2
3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
4target triple = "aarch64--linux-gnu"
5
6; CHECK-LABEL: @reduction_i8
7;
8; char reduction_i8(char *a, char *b, int n) {
9;   char sum = 0;
10;   for (int i = 0; i < n; ++i)
11;     sum += (a[i] + b[i]);
12;   return sum;
13; }
14;
15; CHECK: vector.body:
16; CHECK:   phi <16 x i8>
17; CHECK:   load <16 x i8>
18; CHECK:   load <16 x i8>
19; CHECK:   add <16 x i8>
20; CHECK:   add <16 x i8>
21;
22; CHECK: middle.block:
23; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8>
24; CHECK:   zext i8 [[Rdx]] to i32
25;
26define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
27entry:
28  %cmp.12 = icmp sgt i32 %n, 0
29  br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
30
31for.body.preheader:
32  br label %for.body
33
34for.cond.for.cond.cleanup_crit_edge:
35  %add5.lcssa = phi i32 [ %add5, %for.body ]
36  %conv6 = trunc i32 %add5.lcssa to i8
37  br label %for.cond.cleanup
38
39for.cond.cleanup:
40  %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
41  ret i8 %sum.0.lcssa
42
43for.body:
44  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
45  %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
46  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
47  %0 = load i8, ptr %arrayidx, align 1
48  %conv = zext i8 %0 to i32
49  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
50  %1 = load i8, ptr %arrayidx2, align 1
51  %conv3 = zext i8 %1 to i32
52  %conv4 = and i32 %sum.013, 255
53  %add = add nuw nsw i32 %conv, %conv4
54  %add5 = add nuw nsw i32 %add, %conv3
55  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
56  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
57  %exitcond = icmp eq i32 %lftr.wideiv, %n
58  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
59}
60
61; CHECK-LABEL: @reduction_i16_1
62;
63; short reduction_i16_1(short *a, short *b, int n) {
64;   short sum = 0;
65;   for (int i = 0; i < n; ++i)
66;     sum += (a[i] + b[i]);
67;   return sum;
68; }
69;
70; CHECK: vector.body:
71; CHECK:   phi <8 x i16>
72; CHECK:   load <8 x i16>
73; CHECK:   load <8 x i16>
74; CHECK:   add <8 x i16>
75; CHECK:   add <8 x i16>
76;
77; CHECK: middle.block:
78; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16>
79; CHECK:   zext i16 [[Rdx]] to i32
80;
81define i16 @reduction_i16_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
82entry:
83  %cmp.16 = icmp sgt i32 %n, 0
84  br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
85
86for.body.preheader:
87  br label %for.body
88
89for.cond.for.cond.cleanup_crit_edge:
90  %add5.lcssa = phi i32 [ %add5, %for.body ]
91  %conv6 = trunc i32 %add5.lcssa to i16
92  br label %for.cond.cleanup
93
94for.cond.cleanup:
95  %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
96  ret i16 %sum.0.lcssa
97
98for.body:
99  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
100  %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
101  %arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv
102  %0 = load i16, ptr %arrayidx, align 2
103  %conv.14 = zext i16 %0 to i32
104  %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv
105  %1 = load i16, ptr %arrayidx2, align 2
106  %conv3.15 = zext i16 %1 to i32
107  %conv4.13 = and i32 %sum.017, 65535
108  %add = add nuw nsw i32 %conv.14, %conv4.13
109  %add5 = add nuw nsw i32 %add, %conv3.15
110  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
111  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
112  %exitcond = icmp eq i32 %lftr.wideiv, %n
113  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
114}
115
116; CHECK-LABEL: @reduction_i16_2
117;
118; short reduction_i16_2(char *a, char *b, int n) {
119;   short sum = 0;
120;   for (int i = 0; i < n; ++i)
121;     sum += (a[i] + b[i]);
122;   return sum;
123; }
124;
125; CHECK: vector.body:
126; CHECK:   phi <16 x i16>
127; CHECK:   [[Ld1:%[a-zA-Z0-9.]+]] = load <16 x i8>
128; CHECK:   zext <16 x i8> [[Ld1]] to <16 x i16>
129; CHECK:   [[Ld2:%[a-zA-Z0-9.]+]] = load <16 x i8>
130; CHECK:   zext <16 x i8> [[Ld2]] to <16 x i16>
131; CHECK:   add <16 x i16>
132; CHECK:   add <16 x i16>
133;
134; CHECK: middle.block:
135; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16>
136; CHECK:   zext i16 [[Rdx]] to i32
137;
138define i16 @reduction_i16_2(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) {
139entry:
140  %cmp.14 = icmp sgt i32 %n, 0
141  br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
142
143for.body.preheader:
144  br label %for.body
145
146for.cond.for.cond.cleanup_crit_edge:
147  %add5.lcssa = phi i32 [ %add5, %for.body ]
148  %conv6 = trunc i32 %add5.lcssa to i16
149  br label %for.cond.cleanup
150
151for.cond.cleanup:
152  %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
153  ret i16 %sum.0.lcssa
154
155for.body:
156  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
157  %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
158  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv
159  %0 = load i8, ptr %arrayidx, align 1
160  %conv = zext i8 %0 to i32
161  %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv
162  %1 = load i8, ptr %arrayidx2, align 1
163  %conv3 = zext i8 %1 to i32
164  %conv4.13 = and i32 %sum.015, 65535
165  %add = add nuw nsw i32 %conv, %conv4.13
166  %add5 = add nuw nsw i32 %add, %conv3
167  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
168  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
169  %exitcond = icmp eq i32 %lftr.wideiv, %n
170  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
171}
172