xref: /llvm-project/llvm/test/Transforms/LoopVectorize/outer_loop_test2.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; int A[1024], B[1024];
2;
3; void foo(int iCount, int c, int jCount)
4; {
5;
6;   int i, j;
7;
8; #pragma clang loop vectorize(enable) vectorize_width(4)
9;   for (i = 0; i < iCount; i++) {
10;     A[i] = c;
11;     for (j = 0; j < jCount; j++) {
12;       A[i] += B[j] + i;
13;     }
14;   }
15; }
16; RUN: opt -S -passes=loop-vectorize -enable-vplan-native-path < %s | FileCheck %s
17; CHECK: %[[ZeroTripChk:.*]] = icmp sgt i32 %jCount, 0
18; CHECK-LABEL: vector.ph:
19; CHECK: %[[CVal0:.*]] = insertelement <4 x i32> poison, i32 %c, i64 0
20; CHECK-NEXT: %[[CSplat:.*]] = shufflevector <4 x i32> %[[CVal0]], <4 x i32> poison, <4 x i32> zeroinitializer
21
22; CHECK-LABEL: vector.body:
23; CHECK: %[[Ind:.*]] = phi i64 [ 0, %vector.ph ], [ %[[IndNext:.*]], %[[ForInc:.*]] ]
24; CHECK: %[[VecInd:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %[[VecIndNext:.*]], %[[ForInc]] ]
25; CHECK: %[[AAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @A, i64 0, <4 x i64> %[[VecInd]]
26; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[CSplat]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true))
27; CHECK: br i1 %[[ZeroTripChk]], label %[[InnerForPh:.*]], label %[[OuterInc:.*]]
28
29; CHECK: [[InnerForPh]]:
30; CHECK: %[[WideAVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
31; CHECK: %[[VecIndTr:.*]] = trunc <4 x i64> %[[VecInd]] to <4 x i32>
32; CHECK: br label %[[InnerForBody:.*]]
33
34; CHECK: [[InnerForBody]]:
35; CHECK: %[[InnerInd:.*]] = phi <4 x i64> [ zeroinitializer, %[[InnerForPh]] ], [ %[[InnerIndNext:.*]], %[[InnerForBody]] ]
36; CHECK: %[[AccumPhi:.*]] = phi <4 x i32> [ %[[WideAVal]], %[[InnerForPh]] ], [ %[[AccumPhiNext:.*]], %[[InnerForBody]] ]
37; CHECK: %[[BAddr:.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, <4 x i64> %[[InnerInd]]
38; CHECK: %[[WideBVal:.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %[[BAddr]], i32 4, <4 x i1> splat (i1 true), <4 x i32> poison)
39; CHECK: %[[Add1:.*]] = add nsw <4 x i32> %[[WideBVal]], %[[VecIndTr]]
40; CHECK: %[[AccumPhiNext]] = add nsw <4 x i32> %[[Add1]], %[[AccumPhi]]
41; CHECK: %[[InnerIndNext]] = add nuw nsw <4 x i64> %[[InnerInd]], splat (i64 1)
42; CHECK: %[[InnerVecCond:.*]] = icmp eq <4 x i64> %[[InnerIndNext]], {{.*}}
43; CHECK: %[[InnerCond:.+]] = extractelement <4 x i1> %[[InnerVecCond]], i32 0
44; CHECK: br i1 %[[InnerCond]], label %[[InnerCrit:.*]], label %[[InnerForBody]]
45
46; CHECK: [[InnerCrit]]:
47; CHECK: %[[StorePhi:.*]] = phi <4 x i32> [ %[[AccumPhiNext]], %[[InnerForBody]] ]
48; CHECK: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %[[StorePhi]], <4 x ptr> %[[AAddr]], i32 4, <4 x i1> splat (i1 true))
49; CHECK:  br label %[[ForInc]]
50
51; CHECK: [[ForInc]]:
52; CHECK: %[[IndNext]] = add nuw i64 %[[Ind]], 4
53; CHECK: %[[VecIndNext]] = add <4 x i64> %[[VecInd]], splat (i64 4)
54; CHECK: %[[Cmp:.*]] = icmp eq i64 %[[IndNext]], {{.*}}
55; CHECK: br i1 %[[Cmp]], label %middle.block, label %vector.body
56
57@A = common global [1024 x i32] zeroinitializer, align 16
58@B = common global [1024 x i32] zeroinitializer, align 16
59
60; Function Attrs: norecurse nounwind uwtable
61define void @foo(i32 %iCount, i32 %c, i32 %jCount) {
62entry:
63  %cmp22 = icmp sgt i32 %iCount, 0
64  br i1 %cmp22, label %for.body.lr.ph, label %for.end11
65
66for.body.lr.ph:                                   ; preds = %entry
67  %cmp220 = icmp sgt i32 %jCount, 0
68  %wide.trip.count = zext i32 %jCount to i64
69  %wide.trip.count27 = zext i32 %iCount to i64
70  br label %for.body
71
72for.body:                                         ; preds = %for.inc9, %for.body.lr.ph
73  %indvars.iv25 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next26, %for.inc9 ]
74  %arrayidx = getelementptr inbounds [1024 x i32], ptr @A, i64 0, i64 %indvars.iv25
75  store i32 %c, ptr %arrayidx, align 4
76  br i1 %cmp220, label %for.body3.lr.ph, label %for.inc9
77
78for.body3.lr.ph:                                  ; preds = %for.body
79  %arrayidx.promoted = load i32, ptr %arrayidx, align 4
80  %0 = trunc i64 %indvars.iv25 to i32
81  br label %for.body3
82
83for.body3:                                        ; preds = %for.body3, %for.body3.lr.ph
84  %indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
85  %1 = phi i32 [ %arrayidx.promoted, %for.body3.lr.ph ], [ %add8, %for.body3 ]
86  %arrayidx5 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
87  %2 = load i32, ptr %arrayidx5, align 4
88  %add = add nsw i32 %2, %0
89  %add8 = add nsw i32 %add, %1
90  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
91  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
92  br i1 %exitcond, label %for.cond1.for.inc9_crit_edge, label %for.body3
93
94for.cond1.for.inc9_crit_edge:                     ; preds = %for.body3
95  store i32 %add8, ptr %arrayidx, align 4
96  br label %for.inc9
97
98for.inc9:                                         ; preds = %for.cond1.for.inc9_crit_edge, %for.body
99  %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
100  %exitcond28 = icmp eq i64 %indvars.iv.next26, %wide.trip.count27
101  br i1 %exitcond28, label %for.end11, label %for.body, !llvm.loop !1
102
103for.end11:                                        ; preds = %for.inc9, %entry
104  ret void
105}
106
107!1 = distinct !{!1, !2, !3}
108!2 = !{!"llvm.loop.vectorize.width", i32 4}
109!3 = !{!"llvm.loop.vectorize.enable", i1 true}
110