1; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -passes=load-store-vectorizer %s -S -o - | FileCheck %s 2; RUN: opt -passes=load-store-vectorizer %s -S -o - | FileCheck %s 3; RUN: opt -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' %s -S -o - | FileCheck %s 4 5target triple = "x86_64--" 6 7%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } } 8 9@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4 10 11; Function Attrs: convergent nounwind 12define void @test(i32 %base) #0 { 13; CHECK-LABEL: @test( 14; CHECK-NOT: load i32 15; CHECK: load <2 x i32> 16; CHECK-NOT: load i32 17entry: 18 %mul331 = and i32 %base, -4 19 %add350.4 = add i32 4, %mul331 20 %idx351.4 = zext i32 %add350.4 to i64 21 %arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, ptr @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4 22 %add350.5 = add i32 5, %mul331 23 %idx351.5 = zext i32 %add350.5 to i64 24 %arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, ptr @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5 25 %cnd = icmp ult i32 %base, 1000 26 br i1 %cnd, label %loads, label %exit 27 28loads: 29 ; If and only if the loads are in a different BB from the GEPs codegenprepare 30 ; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job 31 ; harder 32 %tmp297.4 = load i32, ptr %arrayidx352.4, align 4, !tbaa !0 33 %tmp297.5 = load i32, ptr %arrayidx352.5, align 4, !tbaa !0 34 br label %exit 35 36exit: 37 ret void 38} 39 40; Function Attrs: convergent nounwind 41define void @test.codegenprepared(i32 %base) #0 { 42; CHECK-LABEL: @test.codegenprepared( 43; CHECK-NOT: load i32 44; CHECK: load <2 x i32> 45; CHECK-NOT: load i32 46entry: 47 %mul331 = and i32 %base, -4 48 %add350.4 = add i32 4, %mul331 49 %idx351.4 = zext i32 %add350.4 to i64 50 %add350.5 = add i32 5, %mul331 51 %idx351.5 = zext i32 %add350.5 to i64 52 %cnd = icmp ult i32 %base, 1000 53 br i1 %cnd, label %loads, label %exit 54 55loads: ; preds = %entry 56 %sunkaddr = mul i64 %idx351.4, 4 57 %sunkaddr1 = getelementptr inbounds i8, ptr @global_pointer, i64 %sunkaddr 58 %sunkaddr2 = getelementptr inbounds i8, ptr %sunkaddr1, i64 4096 59 %tmp297.4 = load i32, ptr %sunkaddr2, align 4, !tbaa !0 60 %sunkaddr3 = mul i64 %idx351.5, 4 61 %sunkaddr4 = getelementptr inbounds i8, ptr @global_pointer, i64 %sunkaddr3 62 %sunkaddr5 = getelementptr inbounds i8, ptr %sunkaddr4, i64 4096 63 %tmp297.5 = load i32, ptr %sunkaddr5, align 4, !tbaa !0 64 br label %exit 65 66exit: ; preds = %loads, %entry 67 ret void 68} 69 70attributes #0 = { convergent nounwind } 71 72!0 = !{!1, !1, i64 0} 73!1 = !{!"float", !2, i64 0} 74!2 = !{!"omnipotent char", !3, i64 0} 75!3 = !{!"Simple C++ TBAA"} 76