1; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -S | FileCheck %s 2 3; TODO: For now test for the `-epilogue-vectorization-minimum-VF` option. In 4; the future we need to replace this with a more meaningful test of the 5; epilogue vectorization cost-model. 6; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-minimum-VF=4 -S | FileCheck %s --check-prefix=CHECK-MIN-4 7; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -force-vector-interleave=1 -S | FileCheck %s --check-prefix=CHECK-MIN-IC-1 8 9target datalayout = "e-m:e-i64:64-n32:64" 10target triple = "powerpc64le-unknown-linux-gnu" 11 12; Do not vectorize epilogues for loops with minsize attribute 13; CHECK-LABEL: @f1 14; CHECK-NOT: vector.main.loop.iter.check 15; CHECK-NOT: vec.epilog.iter.check 16; CHECK-NOT: vec.epilog.ph 17; CHECK-NOT: vec.epilog.vector.body 18; CHECK-NOT: vec.epilog.middle.block 19; CHECK: ret void 20 21define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #0 { 22entry: 23 %cmp1 = icmp sgt i32 %N, 0 24 br i1 %cmp1, label %for.body.preheader, label %for.end 25 26for.body.preheader: ; preds = %entry 27 %wide.trip.count = zext i32 %N to i64 28 br label %for.body 29 30for.body: ; preds = %for.body.preheader, %for.body 31 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 32 %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv 33 %0 = load float, ptr %arrayidx, align 4 34 %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv 35 %1 = load float, ptr %arrayidx2, align 4 36 %add = fadd fast float %0, %1 37 %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv 38 store float %add, ptr %arrayidx4, align 4 39 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 40 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 41 br i1 %exitcond, label %for.body, label %for.end.loopexit 42 43for.end.loopexit: ; preds = %for.body 44 br label %for.end 45 46for.end: ; preds = %for.end.loopexit, %entry 47 ret void 48} 49 50; Do not vectorize epilogues for loops with optsize attribute 51; CHECK-LABEL: @f2 52; CHECK-NOT: vector.main.loop.iter.check 53; CHECK-NOT: vec.epilog.iter.check 54; CHECK-NOT: vec.epilog.ph 55; CHECK-NOT: vec.epilog.vector.body 56; CHECK-NOT: vec.epilog.middle.block 57; CHECK: ret void 58 59define dso_local void @f2(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #1 { 60entry: 61 %cmp1 = icmp sgt i32 %N, 0 62 br i1 %cmp1, label %for.body.preheader, label %for.end 63 64for.body.preheader: ; preds = %entry 65 %wide.trip.count = zext i32 %N to i64 66 br label %for.body 67 68for.body: ; preds = %for.body.preheader, %for.body 69 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 70 %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv 71 %0 = load float, ptr %arrayidx, align 4 72 %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv 73 %1 = load float, ptr %arrayidx2, align 4 74 %add = fadd fast float %0, %1 75 %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv 76 store float %add, ptr %arrayidx4, align 4 77 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 78 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 79 br i1 %exitcond, label %for.body, label %for.end.loopexit 80 81for.end.loopexit: ; preds = %for.body 82 br label %for.end 83 84for.end: ; preds = %for.end.loopexit, %entry 85 ret void 86} 87 88; Do not vectorize the epilogue for loops with VF*IC less than the default -epilogue-vectorization-minimum-VF of 16. 89; CHECK-MIN-IC-1-LABEL: @f3 90; CHECK-MIN-IC-1-NOT: vector.main.loop.iter.check 91; CHECK-MIN-IC-1-NOT: vec.epilog.iter.check 92; CHECK-MIN-IC-1-NOT: vec.epilog.ph 93; CHECK-MIN-IC-1-NOT: vec.epilog.vector.body 94; CHECK-MIN-IC-1-NOT: vec.epilog.middle.block 95; CHECK-MIN-IC-1: ret void 96 97; Specify a smaller minimum VF (via `-epilogue-vectorization-minimum-VF=4`) and 98; make sure the epilogue gets vectorized in that case. 99; CHECK-MIN-4-LABEL: @f3 100; CHECK-MIN-4: vector.main.loop.iter.check 101; CHECK-MIN-4: vec.epilog.iter.check 102; CHECK-MIN-4: vec.epilog.ph 103; CHECK-MIN-4: vec.epilog.vector.body 104; CHECK-MIN-4: vec.epilog.middle.block 105; CHECK-MIN-4: ret void 106 107; Default behaviour is to vectorize the epilogue for this loop. 108; CHECK-LABEL: @f3 109; CHECK: vector.main.loop.iter.check 110; CHECK: vec.epilog.iter.check 111; CHECK: vec.epilog.ph 112; CHECK: vec.epilog.vector.body 113; CHECK: vec.epilog.middle.block 114; CHECK: ret void 115 116define dso_local void @f3(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) { 117entry: 118 %cmp1 = icmp sgt i32 %N, 0 119 br i1 %cmp1, label %for.body.preheader, label %for.end 120 121for.body.preheader: ; preds = %entry 122 %wide.trip.count = zext i32 %N to i64 123 br label %for.body 124 125for.body: ; preds = %for.body.preheader, %for.body 126 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 127 %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv 128 %0 = load float, ptr %arrayidx, align 4 129 %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv 130 %1 = load float, ptr %arrayidx2, align 4 131 %add = fadd fast float %0, %1 132 %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv 133 store float %add, ptr %arrayidx4, align 4 134 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 135 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 136 br i1 %exitcond, label %for.body, label %for.end.loopexit 137 138for.end.loopexit: ; preds = %for.body 139 br label %for.end 140 141for.end: ; preds = %for.end.loopexit, %entry 142 ret void 143} 144 145attributes #0 = { minsize } 146attributes #1 = { optsize } 147