1; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -S -o - %s | FileCheck %s 2; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5 6; Vectorized subsets of the load/store chains in the presence of 7; interleaved loads/stores 8 9; CHECK-LABEL: @interleave_2L_2S( 10; CHECK: load <2 x i32> 11; CHECK: store <2 x i32> 12; CHECK: load <2 x i32> 13define void @interleave_2L_2S(ptr noalias %ptr) { 14 %next.gep1 = getelementptr i32, ptr %ptr, i64 1 15 %next.gep2 = getelementptr i32, ptr %ptr, i64 2 16 17 %l1 = load i32, ptr %next.gep1, align 4 18 %l2 = load i32, ptr %ptr, align 4 19 store i32 0, ptr %next.gep1, align 4 20 store i32 0, ptr %ptr, align 4 21 %l3 = load i32, ptr %next.gep1, align 4 22 %l4 = load i32, ptr %next.gep2, align 4 23 24 ret void 25} 26 27; CHECK-LABEL: @interleave_3L_2S_1L( 28; CHECK: load <2 x i32> 29; CHECK: store <2 x i32> 30; CHECK: load <2 x i32> 31 32define void @interleave_3L_2S_1L(ptr noalias %ptr) { 33 %next.gep1 = getelementptr i32, ptr %ptr, i64 1 34 %next.gep2 = getelementptr i32, ptr %ptr, i64 2 35 36 %l2 = load i32, ptr %ptr, align 4 37 %l1 = load i32, ptr %next.gep1, align 4 38 store i32 0, ptr %next.gep1, align 4 39 store i32 0, ptr %ptr, align 4 40 %l3 = load i32, ptr %next.gep1, align 4 41 %l4 = load i32, ptr %next.gep2, align 4 42 43 ret void 44} 45 46; CHECK-LABEL: @chain_suffix( 47; CHECK: load i32 48; CHECK: store <2 x i32> 49; CHECK: load <2 x i32> 50define void @chain_suffix(ptr noalias %ptr) { 51 %next.gep1 = getelementptr i32, ptr %ptr, i64 1 52 %next.gep2 = getelementptr i32, ptr %ptr, i64 2 53 54 %l2 = load i32, ptr %ptr, align 4 55 store i32 0, ptr %next.gep1, align 4 56 store i32 0, ptr %ptr, align 4 57 %l3 = load i32, ptr %next.gep1, align 4 58 %l4 = load i32, ptr %next.gep2, align 4 59 60 ret void 61} 62 63 64; CHECK-LABEL: @chain_prefix_suffix( 65; CHECK: load <2 x i32> 66; CHECK: store <2 x i32> 67; CHECK: load <3 x i32> 68define void @chain_prefix_suffix(ptr noalias %ptr) { 69 %next.gep1 = getelementptr i32, ptr %ptr, i64 1 70 %next.gep2 = getelementptr i32, ptr %ptr, i64 2 71 %next.gep3 = getelementptr i32, ptr %ptr, i64 3 72 73 %l1 = load i32, ptr %ptr, align 4 74 %l2 = load i32, ptr %next.gep1, align 4 75 store i32 0, ptr %next.gep1, align 4 76 store i32 0, ptr %next.gep2, align 4 77 %l3 = load i32, ptr %next.gep1, align 4 78 %l4 = load i32, ptr %next.gep2, align 4 79 %l5 = load i32, ptr %next.gep3, align 4 80 81 ret void 82} 83 84; CHECK-LABEL: @interleave_get_longest 85; CHECK: load <2 x i32> 86; CHECK: store <2 x i32> zeroinitializer 87; CHECK: load <3 x i32> 88; CHECK: load i32 89; CHECK: load i32 90 91define void @interleave_get_longest(ptr noalias %ptr) { 92 %tmp2 = getelementptr i32, ptr %ptr, i64 1 93 %tmp3 = getelementptr i32, ptr %ptr, i64 2 94 %tmp4 = getelementptr i32, ptr %ptr, i64 3 95 %tmp5 = getelementptr i32, ptr %ptr, i64 4 96 97 %l1 = load i32, ptr %tmp2, align 4 98 %l2 = load i32, ptr %ptr, align 4 99 store i32 0, ptr %tmp2, align 4 100 store i32 0, ptr %ptr, align 4 101 %l3 = load i32, ptr %tmp2, align 4 102 %l4 = load i32, ptr %tmp3, align 4 103 %l5 = load i32, ptr %tmp4, align 4 104 %l6 = load i32, ptr %tmp5, align 4 105 %l7 = load i32, ptr %tmp5, align 4 106 107 ret void 108} 109 110; CHECK-LABEL: @interleave_get_longest_aligned 111; CHECK: load <2 x i32> 112; CHECK: store <2 x i32> zeroinitializer 113; CHECK: load <4 x i32> 114 115define void @interleave_get_longest_aligned(ptr noalias %ptr) { 116 %tmp2 = getelementptr i32, ptr %ptr, i64 1 117 %tmp3 = getelementptr i32, ptr %ptr, i64 2 118 %tmp4 = getelementptr i32, ptr %ptr, i64 3 119 %tmp5 = getelementptr i32, ptr %ptr, i64 4 120 121 %l1 = load i32, ptr %tmp2, align 4 122 %l2 = load i32, ptr %ptr, align 4 123 store i32 0, ptr %tmp2, align 4 124 store i32 0, ptr %ptr, align 4 125 %l3 = load i32, ptr %tmp2, align 16 126 %l4 = load i32, ptr %tmp3, align 4 127 %l5 = load i32, ptr %tmp4, align 8 128 %l6 = load i32, ptr %tmp5, align 4 129 %l7 = load i32, ptr %tmp5, align 4 130 131 ret void 132}