1be51fa45SRoman Lebedev; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -passes=loop-vectorize -tail-predication=enabled -S < %s | \ 27ebc6bedSSjoerd Meijer; RUN: FileCheck %s 37ebc6bedSSjoerd Meijer 4309f1e4aSDavid Greentarget datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5309f1e4aSDavid Green 67ebc6bedSSjoerd Meijer; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding. 77ebc6bedSSjoerd Meijer 8*2fab9275SNikita Popovdefine dso_local void @f1(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, ptr noalias nocapture readonly %C, i32 %N) { 9872f7000SDávid Bolvanský; CHECK-LABEL: f1( 10872f7000SDávid Bolvanský; CHECK: entry: 11872f7000SDávid Bolvanský; CHECK: @llvm.get.active.lane.mask 12872f7000SDávid Bolvanský; CHECK: } 137ebc6bedSSjoerd Meijerentry: 147ebc6bedSSjoerd Meijer %cmp8 = icmp sgt i32 %N, 0 157ebc6bedSSjoerd Meijer br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup 167ebc6bedSSjoerd Meijer 177ebc6bedSSjoerd Meijerfor.body.preheader: ; preds = %entry 187ebc6bedSSjoerd Meijer br label %for.body 197ebc6bedSSjoerd Meijer 207ebc6bedSSjoerd Meijerfor.cond.cleanup.loopexit: ; preds = %for.body 217ebc6bedSSjoerd Meijer br label %for.cond.cleanup 227ebc6bedSSjoerd Meijer 237ebc6bedSSjoerd Meijerfor.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 247ebc6bedSSjoerd Meijer ret void 257ebc6bedSSjoerd Meijer 267ebc6bedSSjoerd Meijerfor.body: ; preds = %for.body.preheader, %for.body 277ebc6bedSSjoerd Meijer %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 28*2fab9275SNikita Popov %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.09 29*2fab9275SNikita Popov %0 = load i32, ptr %arrayidx, align 4 30*2fab9275SNikita Popov %arrayidx1 = getelementptr inbounds i32, ptr %C, i32 %i.09 31*2fab9275SNikita Popov %1 = load i32, ptr %arrayidx1, align 4 327ebc6bedSSjoerd Meijer %add = add nsw i32 %1, %0 33*2fab9275SNikita Popov %arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %i.09 34*2fab9275SNikita Popov store i32 %add, ptr %arrayidx2, align 4 357ebc6bedSSjoerd Meijer %inc = add nuw nsw i32 %i.09, 1 367ebc6bedSSjoerd Meijer %exitcond.not = icmp eq i32 %inc, %N 377ebc6bedSSjoerd Meijer br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body 387ebc6bedSSjoerd Meijer} 392b69efdeSDavid Green 40*2fab9275SNikita Popovdefine dso_local void @f32_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) { 41872f7000SDávid Bolvanský; CHECK-LABEL: f32_reduction( 422b69efdeSDavid Green; CHECK: vector.body: 43872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 44872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 452b69efdeSDavid Greenentry: 462b69efdeSDavid Green %cmp6 = icmp eq i32 %N, 0 472b69efdeSDavid Green br i1 %cmp6, label %while.end, label %while.body.preheader 482b69efdeSDavid Green 492b69efdeSDavid Greenwhile.body.preheader: ; preds = %entry 502b69efdeSDavid Green br label %while.body 512b69efdeSDavid Green 522b69efdeSDavid Greenwhile.body: ; preds = %while.body.preheader, %while.body 532b69efdeSDavid Green %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] 542b69efdeSDavid Green %sum.08 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] 55*2fab9275SNikita Popov %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] 56*2fab9275SNikita Popov %incdec.ptr = getelementptr inbounds float, ptr %Input.addr.07, i32 1 57*2fab9275SNikita Popov %0 = load float, ptr %Input.addr.07, align 4 582b69efdeSDavid Green %add = fadd fast float %0, %sum.08 592b69efdeSDavid Green %dec = add i32 %blkCnt.09, -1 602b69efdeSDavid Green %cmp = icmp eq i32 %dec, 0 612b69efdeSDavid Green br i1 %cmp, label %while.end.loopexit, label %while.body 622b69efdeSDavid Green 632b69efdeSDavid Greenwhile.end.loopexit: ; preds = %while.body 642b69efdeSDavid Green %add.lcssa = phi float [ %add, %while.body ] 652b69efdeSDavid Green br label %while.end 662b69efdeSDavid Green 672b69efdeSDavid Greenwhile.end: ; preds = %while.end.loopexit, %entry 682b69efdeSDavid Green %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] 692b69efdeSDavid Green %conv = uitofp i32 %N to float 702b69efdeSDavid Green %div = fdiv fast float %sum.0.lcssa, %conv 71*2fab9275SNikita Popov store float %div, ptr %Output, align 4 722b69efdeSDavid Green ret void 732b69efdeSDavid Green} 742b69efdeSDavid Green 75*2fab9275SNikita Popovdefine dso_local void @f16_reduction(ptr nocapture readonly %Input, i32 %N, ptr nocapture %Output) { 76872f7000SDávid Bolvanský; CHECK-LABEL: f16_reduction( 772b69efdeSDavid Green; CHECK: vector.body: 78872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 79872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 802b69efdeSDavid Greenentry: 812b69efdeSDavid Green %cmp6 = icmp eq i32 %N, 0 822b69efdeSDavid Green br i1 %cmp6, label %while.end, label %while.body.preheader 832b69efdeSDavid Green 842b69efdeSDavid Greenwhile.body.preheader: ; preds = %entry 852b69efdeSDavid Green br label %while.body 862b69efdeSDavid Green 872b69efdeSDavid Greenwhile.body: ; preds = %while.body.preheader, %while.body 882b69efdeSDavid Green %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] 892b69efdeSDavid Green %sum.08 = phi half [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] 90*2fab9275SNikita Popov %Input.addr.07 = phi ptr [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ] 91*2fab9275SNikita Popov %incdec.ptr = getelementptr inbounds half, ptr %Input.addr.07, i32 1 92*2fab9275SNikita Popov %0 = load half, ptr %Input.addr.07, align 2 932b69efdeSDavid Green %add = fadd fast half %0, %sum.08 942b69efdeSDavid Green %dec = add i32 %blkCnt.09, -1 952b69efdeSDavid Green %cmp = icmp eq i32 %dec, 0 962b69efdeSDavid Green br i1 %cmp, label %while.end.loopexit, label %while.body 972b69efdeSDavid Green 982b69efdeSDavid Greenwhile.end.loopexit: ; preds = %while.body 992b69efdeSDavid Green %add.lcssa = phi half [ %add, %while.body ] 1002b69efdeSDavid Green br label %while.end 1012b69efdeSDavid Green 1022b69efdeSDavid Greenwhile.end: ; preds = %while.end.loopexit, %entry 1032b69efdeSDavid Green %sum.0.lcssa = phi half [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] 1042b69efdeSDavid Green %conv = uitofp i32 %N to half 1052b69efdeSDavid Green %div = fdiv fast half %sum.0.lcssa, %conv 106*2fab9275SNikita Popov store half %div, ptr %Output, align 2 1072b69efdeSDavid Green ret void 1082b69efdeSDavid Green} 1092b69efdeSDavid Green 110*2fab9275SNikita Popovdefine dso_local void @mixed_f32_i32_reduction(ptr nocapture readonly %fInput, ptr nocapture readonly %iInput, i32 %N, ptr nocapture %fOutput, ptr nocapture %iOutput) { 111872f7000SDávid Bolvanský; CHECK-LABEL: mixed_f32_i32_reduction( 1122b69efdeSDavid Green; CHECK: vector.body: 113872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 114872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 1152b69efdeSDavid Greenentry: 1162b69efdeSDavid Green %cmp15 = icmp eq i32 %N, 0 1172b69efdeSDavid Green br i1 %cmp15, label %while.end, label %while.body.preheader 1182b69efdeSDavid Green 1192b69efdeSDavid Greenwhile.body.preheader: 1202b69efdeSDavid Green br label %while.body 1212b69efdeSDavid Green 1222b69efdeSDavid Greenwhile.body: 1232b69efdeSDavid Green %blkCnt.020 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ] 1242b69efdeSDavid Green %isum.019 = phi i32 [ %add2, %while.body ], [ 0, %while.body.preheader ] 1252b69efdeSDavid Green %fsum.018 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ] 126*2fab9275SNikita Popov %fInput.addr.017 = phi ptr [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ] 127*2fab9275SNikita Popov %iInput.addr.016 = phi ptr [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ] 128*2fab9275SNikita Popov %incdec.ptr = getelementptr inbounds float, ptr %fInput.addr.017, i32 1 129*2fab9275SNikita Popov %incdec.ptr1 = getelementptr inbounds i32, ptr %iInput.addr.016, i32 1 130*2fab9275SNikita Popov %0 = load i32, ptr %iInput.addr.016, align 4 1312b69efdeSDavid Green %add2 = add nsw i32 %0, %isum.019 132*2fab9275SNikita Popov %1 = load float, ptr %fInput.addr.017, align 4 1332b69efdeSDavid Green %add = fadd fast float %1, %fsum.018 1342b69efdeSDavid Green %dec = add i32 %blkCnt.020, -1 1352b69efdeSDavid Green %cmp = icmp eq i32 %dec, 0 1362b69efdeSDavid Green br i1 %cmp, label %while.end.loopexit, label %while.body 1372b69efdeSDavid Green 1382b69efdeSDavid Greenwhile.end.loopexit: 1392b69efdeSDavid Green %add.lcssa = phi float [ %add, %while.body ] 1402b69efdeSDavid Green %add2.lcssa = phi i32 [ %add2, %while.body ] 1412b69efdeSDavid Green %phitmp = sitofp i32 %add2.lcssa to float 1422b69efdeSDavid Green br label %while.end 1432b69efdeSDavid Green 1442b69efdeSDavid Greenwhile.end: 1452b69efdeSDavid Green %fsum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ] 1462b69efdeSDavid Green %isum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %phitmp, %while.end.loopexit ] 1472b69efdeSDavid Green %conv = uitofp i32 %N to float 1482b69efdeSDavid Green %div = fdiv fast float %fsum.0.lcssa, %conv 149*2fab9275SNikita Popov store float %div, ptr %fOutput, align 4 1502b69efdeSDavid Green %div5 = fdiv fast float %isum.0.lcssa, %conv 1512b69efdeSDavid Green %conv6 = fptosi float %div5 to i32 152*2fab9275SNikita Popov store i32 %conv6, ptr %iOutput, align 4 1532b69efdeSDavid Green ret void 1542b69efdeSDavid Green} 1552b69efdeSDavid Green 156*2fab9275SNikita Popovdefine dso_local i32 @i32_mul_reduction(ptr noalias nocapture readonly %B, i32 %N) { 157872f7000SDávid Bolvanský; CHECK-LABEL: i32_mul_reduction( 1582b69efdeSDavid Green; CHECK: vector.body: 159872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 160872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 1612b69efdeSDavid Greenentry: 1622b69efdeSDavid Green %cmp6 = icmp sgt i32 %N, 0 1632b69efdeSDavid Green br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup 1642b69efdeSDavid Green 1652b69efdeSDavid Greenfor.body.preheader: 1662b69efdeSDavid Green br label %for.body 1672b69efdeSDavid Green 1682b69efdeSDavid Greenfor.cond.cleanup.loopexit: 1692b69efdeSDavid Green %mul.lcssa = phi i32 [ %mul, %for.body ] 1702b69efdeSDavid Green br label %for.cond.cleanup 1712b69efdeSDavid Green 1722b69efdeSDavid Greenfor.cond.cleanup: 1732b69efdeSDavid Green %S.0.lcssa = phi i32 [ 1, %entry ], [ %mul.lcssa, %for.cond.cleanup.loopexit ] 1742b69efdeSDavid Green ret i32 %S.0.lcssa 1752b69efdeSDavid Green 1762b69efdeSDavid Greenfor.body: 1772b69efdeSDavid Green %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 1782b69efdeSDavid Green %S.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ] 179*2fab9275SNikita Popov %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08 180*2fab9275SNikita Popov %0 = load i32, ptr %arrayidx, align 4 1812b69efdeSDavid Green %mul = mul nsw i32 %0, %S.07 1822b69efdeSDavid Green %inc = add nuw nsw i32 %i.08, 1 1832b69efdeSDavid Green %exitcond = icmp eq i32 %inc, %N 1842b69efdeSDavid Green br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 1852b69efdeSDavid Green} 1862b69efdeSDavid Green 187*2fab9275SNikita Popovdefine dso_local i32 @i32_or_reduction(ptr noalias nocapture readonly %B, i32 %N) { 188872f7000SDávid Bolvanský; CHECK-LABEL: i32_or_reduction( 1892b69efdeSDavid Green; CHECK: vector.body: 190872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 191872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 1922b69efdeSDavid Greenentry: 1932b69efdeSDavid Green %cmp6 = icmp sgt i32 %N, 0 1942b69efdeSDavid Green br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup 1952b69efdeSDavid Green 1962b69efdeSDavid Greenfor.body.preheader: ; preds = %entry 1972b69efdeSDavid Green br label %for.body 1982b69efdeSDavid Green 1992b69efdeSDavid Greenfor.cond.cleanup.loopexit: ; preds = %for.body 2002b69efdeSDavid Green %or.lcssa = phi i32 [ %or, %for.body ] 2012b69efdeSDavid Green br label %for.cond.cleanup 2022b69efdeSDavid Green 2032b69efdeSDavid Greenfor.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 2042b69efdeSDavid Green %S.0.lcssa = phi i32 [ 1, %entry ], [ %or.lcssa, %for.cond.cleanup.loopexit ] 2052b69efdeSDavid Green ret i32 %S.0.lcssa 2062b69efdeSDavid Green 2072b69efdeSDavid Greenfor.body: ; preds = %for.body.preheader, %for.body 2082b69efdeSDavid Green %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 2092b69efdeSDavid Green %S.07 = phi i32 [ %or, %for.body ], [ 1, %for.body.preheader ] 210*2fab9275SNikita Popov %arrayidx = getelementptr inbounds i32, ptr %B, i32 %i.08 211*2fab9275SNikita Popov %0 = load i32, ptr %arrayidx, align 4 2122b69efdeSDavid Green %or = or i32 %0, %S.07 2132b69efdeSDavid Green %inc = add nuw nsw i32 %i.08, 1 2142b69efdeSDavid Green %exitcond = icmp eq i32 %inc, %N 2152b69efdeSDavid Green br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 2162b69efdeSDavid Green} 2172b69efdeSDavid Green 218*2fab9275SNikita Popovdefine dso_local i32 @i32_and_reduction(ptr noalias nocapture readonly %A, i32 %N, i32 %S) { 219872f7000SDávid Bolvanský; CHECK-LABEL: i32_and_reduction( 2202b69efdeSDavid Green; CHECK: vector.body: 221872f7000SDávid Bolvanský; CHECK: @llvm.masked.load 222872f7000SDávid Bolvanský; CHECK: br i1 %{{.*}}, label {{.*}}, label %vector.body 2232b69efdeSDavid Greenentry: 2242b69efdeSDavid Green %cmp5 = icmp sgt i32 %N, 0 2252b69efdeSDavid Green br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup 2262b69efdeSDavid Green 2272b69efdeSDavid Greenfor.body.preheader: ; preds = %entry 2282b69efdeSDavid Green br label %for.body 2292b69efdeSDavid Green 2302b69efdeSDavid Greenfor.cond.cleanup.loopexit: ; preds = %for.body 2312b69efdeSDavid Green %and.lcssa = phi i32 [ %and, %for.body ] 2322b69efdeSDavid Green br label %for.cond.cleanup 2332b69efdeSDavid Green 2342b69efdeSDavid Greenfor.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 2352b69efdeSDavid Green %S.addr.0.lcssa = phi i32 [ %S, %entry ], [ %and.lcssa, %for.cond.cleanup.loopexit ] 2362b69efdeSDavid Green ret i32 %S.addr.0.lcssa 2372b69efdeSDavid Green 2382b69efdeSDavid Greenfor.body: ; preds = %for.body.preheader, %for.body 2392b69efdeSDavid Green %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] 2402b69efdeSDavid Green %S.addr.06 = phi i32 [ %and, %for.body ], [ %S, %for.body.preheader ] 241*2fab9275SNikita Popov %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.07 242*2fab9275SNikita Popov %0 = load i32, ptr %arrayidx, align 4 2432b69efdeSDavid Green %and = and i32 %0, %S.addr.06 2442b69efdeSDavid Green %inc = add nuw nsw i32 %i.07, 1 2452b69efdeSDavid Green %exitcond = icmp eq i32 %inc, %N 2462b69efdeSDavid Green br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 2472b69efdeSDavid Green} 248