1; REQUIRES: asserts 2 3; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ 4; RUN: -force-tail-folding-style=data-with-evl \ 5; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ 6; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=IF-EVL,CHECK %s 7 8; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ 9; RUN: -force-tail-folding-style=none \ 10; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ 11; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP,CHECK %s 12 13define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { 14; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { 15; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF 16; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count 17; IF-EVL-NEXT: Live-in ir<%N> = original trip-count 18; IF-EVL-EMPTY: 19; IF-EVL: vector.ph: 20; IF-EVL-NEXT: Successor(s): vector loop 21; IF-EVL-EMPTY: 22; IF-EVL-NEXT: <x1> vector loop: { 23; IF-EVL-NEXT: vector.body: 24; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION 25; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%.+]]> 26; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> 27; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> 28; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> 29; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> 30; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> 31; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> 32; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> 33; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> 34; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> 35; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add nsw ir<[[LD2]]>, ir<[[LD1]]>, vp<[[EVL]]> 36; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> 37; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> 38; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> 39; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 40; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> 41; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]> 42; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> 43; IF-EVL-NEXT: No successors 44; IF-EVL-NEXT: } 45 46; NO-VP: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { 47; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF 48; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count 49; NO-VP-NEXT: Live-in ir<%N> = original trip-count 50; NO-VP-EMPTY: 51; NO-VP: vector.ph: 52; NO-VP-NEXT: Successor(s): vector loop 53; NO-VP-EMPTY: 54; NO-VP-NEXT: <x1> vector loop: { 55; NO-VP-NEXT: vector.body: 56; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION 57; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> 58; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> 59; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> 60; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> 61; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> 62; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> 63; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> 64; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> 65; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> 66; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> 67; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> 68; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> 69; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> 70; NO-VP-NEXT: No successors 71; NO-VP-NEXT: } 72 73entry: 74 br label %for.body 75 76for.body: 77 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 78 %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv 79 %0 = load i32, ptr %arrayidx, align 4 80 %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv 81 %1 = load i32, ptr %arrayidx2, align 4 82 %add = add nsw i32 %1, %0 83 %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv 84 store i32 %add, ptr %arrayidx4, align 4 85 %iv.next = add nuw nsw i64 %iv, 1 86 %exitcond.not = icmp eq i64 %iv.next, %N 87 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 88 89for.cond.cleanup: 90 ret void 91} 92 93define void @safe_dep(ptr %p) { 94; CHECK: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF>=1' { 95; CHECK-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF 96; CHECK-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count 97; CHECK-NEXT: Live-in ir<512> = original trip-count 98; CHECK-EMPTY: 99; CHECK: vector.ph: 100; CHECK-NEXT: Successor(s): vector loop 101; CHECK-EMPTY: 102; CHECK-NEXT: <x1> vector loop: { 103; CHECK-NEXT: vector.body: 104; CHECK-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION 105; CHECK-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> 106; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr ir<%p>, vp<[[ST]]> 107; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> 108; CHECK-NEXT: WIDEN ir<[[V:%.+]]> = load vp<[[PTR1]]> 109; CHECK-NEXT: CLONE ir<[[OFFSET:.+]]> = add vp<[[ST]]>, ir<100> 110; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]> 111; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> 112; CHECK-NEXT: WIDEN store vp<[[PTR2]]>, ir<[[V]]> 113; CHECK-NEXT: EMIT vp<[[IV_NEXT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> 114; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> 115; CHECK-NEXT: No successors 116; CHECK-NEXT: } 117 118entry: 119 br label %loop 120 121loop: 122 %iv = phi i64 [0, %entry], [%iv.next, %loop] 123 %a1 = getelementptr i64, ptr %p, i64 %iv 124 %v = load i64, ptr %a1, align 32 125 %offset = add i64 %iv, 100 126 %a2 = getelementptr i64, ptr %p, i64 %offset 127 store i64 %v, ptr %a2, align 32 128 %iv.next = add i64 %iv, 1 129 %cmp = icmp ne i64 %iv, 511 130 br i1 %cmp, label %loop, label %exit 131 132exit: 133 ret void 134} 135 136