1; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX1 2; RUN: opt -passes=loop-vectorize -vectorizer-maximize-bandwidth -mcpu=core-avx2 -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-AVX2 3; REQUIRES: asserts 4 5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 6target triple = "x86_64-unknown-linux-gnu" 7 8@a = global [1000 x i8] zeroinitializer, align 16 9@b = global [1000 x i8] zeroinitializer, align 16 10@c = global [1000 x i8] zeroinitializer, align 16 11@u = global [1000 x i32] zeroinitializer, align 16 12@v = global [1000 x i32] zeroinitializer, align 16 13@w = global [1000 x i32] zeroinitializer, align 16 14 15; Tests that the vectorization factor is determined by the smallest instead of 16; widest type in the loop for maximum bandwidth when 17; -vectorizer-maximize-bandwidth is indicated. 18; 19; CHECK-LABEL: foo 20; CHECK-AVX1: LV: Selecting VF: 16. 21; CHECK-AVX2: LV: Selecting VF: 32. 22define void @foo() { 23entry: 24 br label %for.body 25 26for.cond.cleanup: 27 ret void 28 29for.body: 30 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 31 %arrayidx = getelementptr inbounds [1000 x i8], ptr @b, i64 0, i64 %indvars.iv 32 %0 = load i8, ptr %arrayidx, align 1 33 %arrayidx2 = getelementptr inbounds [1000 x i8], ptr @c, i64 0, i64 %indvars.iv 34 %1 = load i8, ptr %arrayidx2, align 1 35 %add = add i8 %1, %0 36 %arrayidx6 = getelementptr inbounds [1000 x i8], ptr @a, i64 0, i64 %indvars.iv 37 store i8 %add, ptr %arrayidx6, align 1 38 %arrayidx8 = getelementptr inbounds [1000 x i32], ptr @v, i64 0, i64 %indvars.iv 39 %2 = load i32, ptr %arrayidx8, align 4 40 %arrayidx10 = getelementptr inbounds [1000 x i32], ptr @w, i64 0, i64 %indvars.iv 41 %3 = load i32, ptr %arrayidx10, align 4 42 %add11 = add nsw i32 %3, %2 43 %arrayidx13 = getelementptr inbounds [1000 x i32], ptr @u, i64 0, i64 %indvars.iv 44 store i32 %add11, ptr %arrayidx13, align 4 45 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 46 %exitcond = icmp eq i64 %indvars.iv.next, 1000 47 br i1 %exitcond, label %for.cond.cleanup, label %for.body 48} 49 50; We should not choose a VF larger than the constant TC. 51; VF chosen should be atmost 16 (not the max possible vector width = 32 for AVX2) 52define void @not_too_small_tc(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 53; CHECK-LABEL: not_too_small_tc 54; CHECK-AVX2: LV: Selecting VF: 16. 55entry: 56 br label %for.body 57 58for.body: 59 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 60 %arrayidx = getelementptr inbounds i8, ptr %B, i64 %indvars.iv 61 %l1 = load i8, ptr %arrayidx, align 4, !llvm.access.group !13 62 %arrayidx2 = getelementptr inbounds i8, ptr %A, i64 %indvars.iv 63 %l2 = load i8, ptr %arrayidx2, align 4, !llvm.access.group !13 64 %add = add i8 %l1, %l2 65 store i8 %add, ptr %arrayidx2, align 4, !llvm.access.group !13 66 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 67 %exitcond = icmp eq i64 %indvars.iv.next, 16 68 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4 69 70for.end: 71 ret void 72} 73!3 = !{!3, !{!"llvm.loop.parallel_accesses", !13}} 74!4 = !{!4} 75!13 = distinct !{} 76