1; REQUIRES: asserts 2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED 3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON 4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW 5 6; Test that the MaxVF for the following loop, that has no dependence distances, 7; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 8; (maximized bandwidth for i8 in the loop). 9define void @test0(ptr %a, ptr %b, ptr %c) #0 { 10; CHECK: LV: Checking a loop in 'test0' 11; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 12; CHECK_SCALABLE_ON: LV: Selecting VF: 16 13; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 14; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 15; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16 16; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16 17entry: 18 br label %loop 19 20loop: 21 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 22 %arrayidx = getelementptr inbounds i32, ptr %c, i64 %iv 23 %0 = load i32, ptr %arrayidx, align 4 24 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv 25 %1 = load i8, ptr %arrayidx2, align 4 26 %zext = zext i8 %1 to i32 27 %add = add nsw i32 %zext, %0 28 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %iv 29 store i32 %add, ptr %arrayidx5, align 4 30 %iv.next = add nuw nsw i64 %iv, 1 31 %exitcond.not = icmp eq i64 %iv.next, 1024 32 br i1 %exitcond.not, label %exit, label %loop 33 34exit: 35 ret void 36} 37 38; Test that the MaxVF for the following loop, with a dependence distance 39; of 64 elements, is calculated as (maxvscale = 16) * 4. 40define void @test1(ptr %a, ptr %b) #0 { 41; CHECK: LV: Checking a loop in 'test1' 42; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 43; CHECK_SCALABLE_ON: LV: Selecting VF: 16 44; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 45; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 46; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4 47; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 48entry: 49 br label %loop 50 51loop: 52 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 53 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 54 %0 = load i32, ptr %arrayidx, align 4 55 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv 56 %1 = load i8, ptr %arrayidx2, align 4 57 %zext = zext i8 %1 to i32 58 %add = add nsw i32 %zext, %0 59 %2 = add nuw nsw i64 %iv, 64 60 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 61 store i32 %add, ptr %arrayidx5, align 4 62 %iv.next = add nuw nsw i64 %iv, 1 63 %exitcond.not = icmp eq i64 %iv.next, 1024 64 br i1 %exitcond.not, label %exit, label %loop 65 66exit: 67 ret void 68} 69 70; Test that the MaxVF for the following loop, with a dependence distance 71; of 32 elements, is calculated as (maxvscale = 16) * 2. 72define void @test2(ptr %a, ptr %b) #0 { 73; CHECK: LV: Checking a loop in 'test2' 74; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 75; CHECK_SCALABLE_ON: LV: Selecting VF: 16 76; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 77; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 78; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2 79; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 80entry: 81 br label %loop 82 83loop: 84 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 85 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 86 %0 = load i32, ptr %arrayidx, align 4 87 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv 88 %1 = load i8, ptr %arrayidx2, align 4 89 %zext = zext i8 %1 to i32 90 %add = add nsw i32 %zext, %0 91 %2 = add nuw nsw i64 %iv, 32 92 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 93 store i32 %add, ptr %arrayidx5, align 4 94 %iv.next = add nuw nsw i64 %iv, 1 95 %exitcond.not = icmp eq i64 %iv.next, 1024 96 br i1 %exitcond.not, label %exit, label %loop 97 98exit: 99 ret void 100} 101 102; Test that the MaxVF for the following loop, with a dependence distance 103; of 16 elements, is calculated as (maxvscale = 16) * 1. 104define void @test3(ptr %a, ptr %b) #0 { 105; CHECK: LV: Checking a loop in 'test3' 106; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 107; CHECK_SCALABLE_ON: LV: Selecting VF: 16 108; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 109; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16 110; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1 111; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16 112entry: 113 br label %loop 114 115loop: 116 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 117 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 118 %0 = load i32, ptr %arrayidx, align 4 119 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %iv 120 %1 = load i8, ptr %arrayidx2, align 4 121 %zext = zext i8 %1 to i32 122 %add = add nsw i32 %zext, %0 123 %2 = add nuw nsw i64 %iv, 16 124 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 125 store i32 %add, ptr %arrayidx5, align 4 126 %iv.next = add nuw nsw i64 %iv, 1 127 %exitcond.not = icmp eq i64 %iv.next, 1024 128 br i1 %exitcond.not, label %exit, label %loop 129 130exit: 131 ret void 132} 133 134; Test the fallback mechanism when scalable vectors are not feasible due 135; to e.g. dependence distance. 136define void @test4(ptr %a, ptr %b) #0 { 137; CHECK: LV: Checking a loop in 'test4' 138; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF 139; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF 140; CHECK_SCALABLE_ON: LV: Selecting VF: 4 141; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 142; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 143; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF 144; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4 145entry: 146 br label %loop 147 148loop: 149 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 150 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 151 %0 = load i32, ptr %arrayidx, align 4 152 %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv 153 %1 = load i32, ptr %arrayidx2, align 4 154 %add = add nsw i32 %1, %0 155 %2 = add nuw nsw i64 %iv, 8 156 %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 %2 157 store i32 %add, ptr %arrayidx5, align 4 158 %iv.next = add nuw nsw i64 %iv, 1 159 %exitcond.not = icmp eq i64 %iv.next, 1024 160 br i1 %exitcond.not, label %exit, label %loop 161 162exit: 163 ret void 164} 165 166attributes #0 = { vscale_range(1, 16) } 167