1; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck --check-prefixes=CHECK,PREDICATED %s 2; RUN: opt < %s -passes=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -prefer-predicate-over-epilogue=scalar-epilogue -S | FileCheck --check-prefixes=CHECK,SCALAR %s 3 4; This file contains the same function but with different trip-count PGO hints 5 6; The function is vectorized if there are no trip-count hints 7define i32 @foo_no_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { 8; CHECK-LABEL: @foo_no_trip_count( 9; PREDICATED: vector.body 10; SCALAR: vector.body 11entry: 12 br label %for.body 13 14for.body: ; preds = %for.body, %entry 15 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] 16 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx 17 %0 = load i8, ptr %a.index, align 1 18 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx 19 %1 = load i8, ptr %b.index, align 1 20 %2 = add i8 %0, %1 21 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx 22 store i8 %2, ptr %c.index, align 1 23 %inc = add nsw i32 %idx, 1 24 %exitcond = icmp eq i32 %idx, %bound 25 br i1 %exitcond, label %for.end, label %for.body 26 27for.end: ; preds = %for.body 28 ret i32 0 29} 30 31; If trip-count is equal to 4, the function is not vectorised 32define i32 @foo_low_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { 33; CHECK-LABEL: @foo_low_trip_count( 34; PREDICATED-NOT: vector.body 35; SCALAR-NOT: vector.body 36entry: 37 br label %for.body 38 39for.body: ; preds = %for.body, %entry 40 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] 41 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx 42 %0 = load i8, ptr %a.index, align 1 43 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx 44 %1 = load i8, ptr %b.index, align 1 45 %2 = add i8 %0, %1 46 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx 47 store i8 %2, ptr %c.index, align 1 48 %inc = add nsw i32 %idx, 1 49 %exitcond = icmp eq i32 %idx, %bound 50 br i1 %exitcond, label %for.end, label %for.body, !prof !0 51 52for.end: ; preds = %for.body 53 ret i32 0 54} 55 56; If trip-count is equal to 10, the function is vectorised when predicated tail folding is chosen 57define i32 @foo_mid_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { 58; CHECK-LABEL: @foo_mid_trip_count( 59; PREDICATED: vector.body 60; SCALAR-NOT: vector.body 61entry: 62 br label %for.body 63 64for.body: ; preds = %for.body, %entry 65 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] 66 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx 67 %0 = load i8, ptr %a.index, align 1 68 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx 69 %1 = load i8, ptr %b.index, align 1 70 %2 = add i8 %0, %1 71 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx 72 store i8 %2, ptr %c.index, align 1 73 %inc = add nsw i32 %idx, 1 74 %exitcond = icmp eq i32 %idx, %bound 75 br i1 %exitcond, label %for.end, label %for.body, !prof !1 76 77for.end: ; preds = %for.body 78 ret i32 0 79} 80 81; If trip-count is equal to 40, the function is always vectorised 82define i32 @foo_high_trip_count(ptr %a, ptr %b, ptr %c, i32 %bound) { 83; CHECK-LABEL: @foo_high_trip_count( 84; PREDICATED: vector.body 85; SCALAR: vector.body 86entry: 87 br label %for.body 88 89for.body: ; preds = %for.body, %entry 90 %idx = phi i32 [ 0, %entry ], [ %inc, %for.body ] 91 %a.index = getelementptr inbounds [32 x i8], ptr %a, i32 0, i32 %idx 92 %0 = load i8, ptr %a.index, align 1 93 %b.index = getelementptr inbounds [32 x i8], ptr %b, i32 0, i32 %idx 94 %1 = load i8, ptr %b.index, align 1 95 %2 = add i8 %0, %1 96 %c.index = getelementptr inbounds [32 x i8], ptr %c, i32 0, i32 %idx 97 store i8 %2, ptr %c.index, align 1 98 %inc = add nsw i32 %idx, 1 99 %exitcond = icmp eq i32 %idx, %bound 100 br i1 %exitcond, label %for.end, label %for.body, !prof !2 101 102for.end: ; preds = %for.body 103 ret i32 0 104} 105 106!0 = !{!"branch_weights", i32 10, i32 30} 107!1 = !{!"branch_weights", i32 10, i32 90} 108!2 = !{!"branch_weights", i32 10, i32 390} 109