xref: /llvm-project/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll (revision 2fab927546b34f5af7770541a9bbb974d9818c5c)
1be51fa45SRoman Lebedev; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -passes=loop-vectorize -tail-predication=enabled -S < %s | \
27ebc6bedSSjoerd Meijer; RUN:  FileCheck %s
37ebc6bedSSjoerd Meijer
4309f1e4aSDavid Greentarget datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5309f1e4aSDavid Green
67ebc6bedSSjoerd Meijer; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding.
77ebc6bedSSjoerd Meijer
8*2fab9275SNikita Popovdefine dso_local void @f1(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) {
9872f7000SDávid Bolvanský; CHECK-LABEL: f1(
10872f7000SDávid Bolvanský; CHECK:       entry:
11872f7000SDávid Bolvanský; CHECK:       @llvm.get.active.lane.mask
12872f7000SDávid Bolvanský; CHECK:       }
137ebc6bedSSjoerd Meijerentry:
147ebc6bedSSjoerd Meijer  %cmp8 = icmp sgt i32 %N, 0
157ebc6bedSSjoerd Meijer  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
167ebc6bedSSjoerd Meijer
177ebc6bedSSjoerd Meijerfor.body.preheader:                               ; preds = %entry
187ebc6bedSSjoerd Meijer  br label %for.body
197ebc6bedSSjoerd Meijer
207ebc6bedSSjoerd Meijerfor.cond.cleanup.loopexit:                        ; preds = %for.body
217ebc6bedSSjoerd Meijer  br label %for.cond.cleanup
227ebc6bedSSjoerd Meijer
237ebc6bedSSjoerd Meijerfor.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
247ebc6bedSSjoerd Meijer  ret void
257ebc6bedSSjoerd Meijer
267ebc6bedSSjoerd Meijerfor.body:                                         ; preds = %for.body.preheader, %for.body
277ebc6bedSSjoerd Meijer  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
28*2fab9275SNikita Popov  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09
29*2fab9275SNikita Popov  %0 = load i32, ptr %arrayidx, align 4
30*2fab9275SNikita Popov  %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09
31*2fab9275SNikita Popov  %1 = load i32, ptr %arrayidx1, align 4
327ebc6bedSSjoerd Meijer  %add = add nsw i32 %1, %0
33*2fab9275SNikita Popov  %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09
34*2fab9275SNikita Popov  store i32 %add, ptr %arrayidx2, align 4
357ebc6bedSSjoerd Meijer  %inc = add nuw nsw i32 %i.09, 1
367ebc6bedSSjoerd Meijer  %exitcond.not = icmp eq i32 %inc, %N
377ebc6bedSSjoerd Meijer  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
387ebc6bedSSjoerd Meijer}
392b69efdeSDavid Green
40*2fab9275SNikita Popovdefine dso_local void @f32_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) {
41872f7000SDávid Bolvanský; CHECK-LABEL: f32_reduction(
422b69efdeSDavid Green; CHECK:       vector.body:
43872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
44872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
452b69efdeSDavid Greenentry:
462b69efdeSDavid Green  %cmp6 = icmp eq i32 %N, 0
472b69efdeSDavid Green  br i1 %cmp6, label %while.end, label %while.body.preheader
482b69efdeSDavid Green
492b69efdeSDavid Greenwhile.body.preheader:                             ; preds = %entry
502b69efdeSDavid Green  br label %while.body
512b69efdeSDavid Green
522b69efdeSDavid Greenwhile.body:                                       ; preds = %while.body.preheader, %while.body
532b69efdeSDavid Green  %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
542b69efdeSDavid Green  %sum.08 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
55*2fab9275SNikita Popov  %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
56*2fab9275SNikita Popov  %incdec.ptr = getelementptr inbounds float, ptr %Input.addr.07, i32 1
57*2fab9275SNikita Popov  %0 = load float, ptr %Input.addr.07, align 4
582b69efdeSDavid Green  %add = fadd fast float %0, %sum.08
592b69efdeSDavid Green  %dec = add i32 %blkCnt.09, -1
602b69efdeSDavid Green  %cmp = icmp eq i32 %dec, 0
612b69efdeSDavid Green  br i1 %cmp, label %while.end.loopexit, label %while.body
622b69efdeSDavid Green
632b69efdeSDavid Greenwhile.end.loopexit:                               ; preds = %while.body
642b69efdeSDavid Green  %add.lcssa = phi float [ %add, %while.body ]
652b69efdeSDavid Green  br label %while.end
662b69efdeSDavid Green
672b69efdeSDavid Greenwhile.end:                                        ; preds = %while.end.loopexit, %entry
682b69efdeSDavid Green  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
692b69efdeSDavid Green  %conv = uitofp i32 %N to float
702b69efdeSDavid Green  %div = fdiv fast float %sum.0.lcssa, %conv
71*2fab9275SNikita Popov  store float %div, ptr %Output, align 4
722b69efdeSDavid Green  ret void
732b69efdeSDavid Green}
742b69efdeSDavid Green
75*2fab9275SNikita Popovdefine dso_local void @f16_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) {
76872f7000SDávid Bolvanský; CHECK-LABEL: f16_reduction(
772b69efdeSDavid Green; CHECK:       vector.body:
78872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
79872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
802b69efdeSDavid Greenentry:
812b69efdeSDavid Green  %cmp6 = icmp eq i32 %N, 0
822b69efdeSDavid Green  br i1 %cmp6, label %while.end, label %while.body.preheader
832b69efdeSDavid Green
842b69efdeSDavid Greenwhile.body.preheader:                             ; preds = %entry
852b69efdeSDavid Green  br label %while.body
862b69efdeSDavid Green
872b69efdeSDavid Greenwhile.body:                                       ; preds = %while.body.preheader, %while.body
882b69efdeSDavid Green  %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
892b69efdeSDavid Green  %sum.08 = phi half [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
90*2fab9275SNikita Popov  %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
91*2fab9275SNikita Popov  %incdec.ptr = getelementptr inbounds half, ptr %Input.addr.07, i32 1
92*2fab9275SNikita Popov  %0 = load half, ptr %Input.addr.07, align 2
932b69efdeSDavid Green  %add = fadd fast half %0, %sum.08
942b69efdeSDavid Green  %dec = add i32 %blkCnt.09, -1
952b69efdeSDavid Green  %cmp = icmp eq i32 %dec, 0
962b69efdeSDavid Green  br i1 %cmp, label %while.end.loopexit, label %while.body
972b69efdeSDavid Green
982b69efdeSDavid Greenwhile.end.loopexit:                               ; preds = %while.body
992b69efdeSDavid Green  %add.lcssa = phi half [ %add, %while.body ]
1002b69efdeSDavid Green  br label %while.end
1012b69efdeSDavid Green
1022b69efdeSDavid Greenwhile.end:                                        ; preds = %while.end.loopexit, %entry
1032b69efdeSDavid Green  %sum.0.lcssa = phi half [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
1042b69efdeSDavid Green  %conv = uitofp i32 %N to half
1052b69efdeSDavid Green  %div = fdiv fast half %sum.0.lcssa, %conv
106*2fab9275SNikita Popov  store half %div, ptr %Output, align 2
1072b69efdeSDavid Green  ret void
1082b69efdeSDavid Green}
1092b69efdeSDavid Green
110*2fab9275SNikita Popovdefine dso_local void @mixed_f32_i32_reduction(ptr nocapture readonly %fInput, ptr nocapture readonly %iInput, i32 %N, ptr nocapture %fOutput, ptr nocapture %iOutput) {
111872f7000SDávid Bolvanský; CHECK-LABEL: mixed_f32_i32_reduction(
1122b69efdeSDavid Green; CHECK:       vector.body:
113872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
114872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
1152b69efdeSDavid Greenentry:
1162b69efdeSDavid Green  %cmp15 = icmp eq i32 %N, 0
1172b69efdeSDavid Green  br i1 %cmp15, label %while.end, label %while.body.preheader
1182b69efdeSDavid Green
1192b69efdeSDavid Greenwhile.body.preheader:
1202b69efdeSDavid Green  br label %while.body
1212b69efdeSDavid Green
1222b69efdeSDavid Greenwhile.body:
1232b69efdeSDavid Green  %blkCnt.020 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
1242b69efdeSDavid Green  %isum.019 = phi i32 [ %add2, %while.body ], [ 0, %while.body.preheader ]
1252b69efdeSDavid Green  %fsum.018 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
126*2fab9275SNikita Popov  %fInput.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ]
127*2fab9275SNikita Popov  %iInput.addr.016 = phi ptr [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ]
128*2fab9275SNikita Popov  %incdec.ptr = getelementptr inbounds float, ptr %fInput.addr.017, i32 1
129*2fab9275SNikita Popov  %incdec.ptr1 = getelementptr inbounds i32, ptr %iInput.addr.016, i32 1
130*2fab9275SNikita Popov  %0 = load i32, ptr %iInput.addr.016, align 4
1312b69efdeSDavid Green  %add2 = add nsw i32 %0, %isum.019
132*2fab9275SNikita Popov  %1 = load float, ptr %fInput.addr.017, align 4
1332b69efdeSDavid Green  %add = fadd fast float %1, %fsum.018
1342b69efdeSDavid Green  %dec = add i32 %blkCnt.020, -1
1352b69efdeSDavid Green  %cmp = icmp eq i32 %dec, 0
1362b69efdeSDavid Green  br i1 %cmp, label %while.end.loopexit, label %while.body
1372b69efdeSDavid Green
1382b69efdeSDavid Greenwhile.end.loopexit:
1392b69efdeSDavid Green  %add.lcssa = phi float [ %add, %while.body ]
1402b69efdeSDavid Green  %add2.lcssa = phi i32 [ %add2, %while.body ]
1412b69efdeSDavid Green  %phitmp = sitofp i32 %add2.lcssa to float
1422b69efdeSDavid Green  br label %while.end
1432b69efdeSDavid Green
1442b69efdeSDavid Greenwhile.end:
1452b69efdeSDavid Green  %fsum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
1462b69efdeSDavid Green  %isum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %phitmp, %while.end.loopexit ]
1472b69efdeSDavid Green  %conv = uitofp i32 %N to float
1482b69efdeSDavid Green  %div = fdiv fast float %fsum.0.lcssa, %conv
149*2fab9275SNikita Popov  store float %div, ptr %fOutput, align 4
1502b69efdeSDavid Green  %div5 = fdiv fast float %isum.0.lcssa, %conv
1512b69efdeSDavid Green  %conv6 = fptosi float %div5 to i32
152*2fab9275SNikita Popov  store i32 %conv6, ptr %iOutput, align 4
1532b69efdeSDavid Green  ret void
1542b69efdeSDavid Green}
1552b69efdeSDavid Green
156*2fab9275SNikita Popovdefine dso_local i32 @i32_mul_reduction(ptr noalias nocapture readonly %B, i32 %N) {
157872f7000SDávid Bolvanský; CHECK-LABEL: i32_mul_reduction(
1582b69efdeSDavid Green; CHECK:       vector.body:
159872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
160872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
1612b69efdeSDavid Greenentry:
1622b69efdeSDavid Green  %cmp6 = icmp sgt i32 %N, 0
1632b69efdeSDavid Green  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
1642b69efdeSDavid Green
1652b69efdeSDavid Greenfor.body.preheader:
1662b69efdeSDavid Green  br label %for.body
1672b69efdeSDavid Green
1682b69efdeSDavid Greenfor.cond.cleanup.loopexit:
1692b69efdeSDavid Green  %mul.lcssa = phi i32 [ %mul, %for.body ]
1702b69efdeSDavid Green  br label %for.cond.cleanup
1712b69efdeSDavid Green
1722b69efdeSDavid Greenfor.cond.cleanup:
1732b69efdeSDavid Green  %S.0.lcssa = phi i32 [ 1, %entry ], [ %mul.lcssa, %for.cond.cleanup.loopexit ]
1742b69efdeSDavid Green  ret i32 %S.0.lcssa
1752b69efdeSDavid Green
1762b69efdeSDavid Greenfor.body:
1772b69efdeSDavid Green  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
1782b69efdeSDavid Green  %S.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ]
179*2fab9275SNikita Popov  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
180*2fab9275SNikita Popov  %0 = load i32, ptr %arrayidx, align 4
1812b69efdeSDavid Green  %mul = mul nsw i32 %0, %S.07
1822b69efdeSDavid Green  %inc = add nuw nsw i32 %i.08, 1
1832b69efdeSDavid Green  %exitcond = icmp eq i32 %inc, %N
1842b69efdeSDavid Green  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
1852b69efdeSDavid Green}
1862b69efdeSDavid Green
187*2fab9275SNikita Popovdefine dso_local i32 @i32_or_reduction(ptr noalias nocapture readonly %B, i32 %N) {
188872f7000SDávid Bolvanský; CHECK-LABEL: i32_or_reduction(
1892b69efdeSDavid Green; CHECK:       vector.body:
190872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
191872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
1922b69efdeSDavid Greenentry:
1932b69efdeSDavid Green  %cmp6 = icmp sgt i32 %N, 0
1942b69efdeSDavid Green  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
1952b69efdeSDavid Green
1962b69efdeSDavid Greenfor.body.preheader:                               ; preds = %entry
1972b69efdeSDavid Green  br label %for.body
1982b69efdeSDavid Green
1992b69efdeSDavid Greenfor.cond.cleanup.loopexit:                        ; preds = %for.body
2002b69efdeSDavid Green  %or.lcssa = phi i32 [ %or, %for.body ]
2012b69efdeSDavid Green  br label %for.cond.cleanup
2022b69efdeSDavid Green
2032b69efdeSDavid Greenfor.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
2042b69efdeSDavid Green  %S.0.lcssa = phi i32 [ 1, %entry ], [ %or.lcssa, %for.cond.cleanup.loopexit ]
2052b69efdeSDavid Green  ret i32 %S.0.lcssa
2062b69efdeSDavid Green
2072b69efdeSDavid Greenfor.body:                                         ; preds = %for.body.preheader, %for.body
2082b69efdeSDavid Green  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
2092b69efdeSDavid Green  %S.07 = phi i32 [ %or, %for.body ], [ 1, %for.body.preheader ]
210*2fab9275SNikita Popov  %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08
211*2fab9275SNikita Popov  %0 = load i32, ptr %arrayidx, align 4
2122b69efdeSDavid Green  %or = or i32 %0, %S.07
2132b69efdeSDavid Green  %inc = add nuw nsw i32 %i.08, 1
2142b69efdeSDavid Green  %exitcond = icmp eq i32 %inc, %N
2152b69efdeSDavid Green  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
2162b69efdeSDavid Green}
2172b69efdeSDavid Green
218*2fab9275SNikita Popovdefine dso_local i32 @i32_and_reduction(ptr noalias nocapture readonly %A, i32 %N, i32 %S) {
219872f7000SDávid Bolvanský; CHECK-LABEL: i32_and_reduction(
2202b69efdeSDavid Green; CHECK:       vector.body:
221872f7000SDávid Bolvanský; CHECK:       @llvm.masked.load
222872f7000SDávid Bolvanský; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
2232b69efdeSDavid Greenentry:
2242b69efdeSDavid Green  %cmp5 = icmp sgt i32 %N, 0
2252b69efdeSDavid Green  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
2262b69efdeSDavid Green
2272b69efdeSDavid Greenfor.body.preheader:                               ; preds = %entry
2282b69efdeSDavid Green  br label %for.body
2292b69efdeSDavid Green
2302b69efdeSDavid Greenfor.cond.cleanup.loopexit:                        ; preds = %for.body
2312b69efdeSDavid Green  %and.lcssa = phi i32 [ %and, %for.body ]
2322b69efdeSDavid Green  br label %for.cond.cleanup
2332b69efdeSDavid Green
2342b69efdeSDavid Greenfor.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
2352b69efdeSDavid Green  %S.addr.0.lcssa = phi i32 [ %S, %entry ], [ %and.lcssa, %for.cond.cleanup.loopexit ]
2362b69efdeSDavid Green  ret i32 %S.addr.0.lcssa
2372b69efdeSDavid Green
2382b69efdeSDavid Greenfor.body:                                         ; preds = %for.body.preheader, %for.body
2392b69efdeSDavid Green  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
2402b69efdeSDavid Green  %S.addr.06 = phi i32 [ %and, %for.body ], [ %S, %for.body.preheader ]
241*2fab9275SNikita Popov  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.07
242*2fab9275SNikita Popov  %0 = load i32, ptr %arrayidx, align 4
2432b69efdeSDavid Green  %and = and i32 %0, %S.addr.06
2442b69efdeSDavid Green  %inc = add nuw nsw i32 %i.07, 1
2452b69efdeSDavid Green  %exitcond = icmp eq i32 %inc, %N
2462b69efdeSDavid Green  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
2472b69efdeSDavid Green}
248