1; REQUIRES: asserts 2; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s 3; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-vectorize,instcombine -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s 4 5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 6 7; CHECK-LABEL: vector_gep 8; CHECK-NOT: LV: Found scalar instruction: %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i 9; CHECK: vector.body: 10; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 11; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ] 12; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* %b, <2 x i64> [[VEC_IND]] 13; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]] 14; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32** [[TMP2]] to <2 x i32*>* 15; CHECK-NEXT: store <2 x i32*> [[TMP1]], <2 x i32*>* [[TMP3]], align 8 16; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 17; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2> 18; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 19; 20define void @vector_gep(i32** %a, i32 *%b, i64 %n) { 21entry: 22 br label %for.body 23 24for.body: 25 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 26 %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i 27 %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i 28 store i32* %tmp0, i32** %tmp1, align 8 29 %i.next = add nuw nsw i64 %i, 1 30 %cond = icmp slt i64 %i.next, %n 31 br i1 %cond, label %for.body, label %for.end 32 33for.end: 34 ret void 35} 36 37; CHECK-LABEL: scalar_store 38; CHECK: LV: Found scalar instruction: %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i 39; CHECK-NEXT: LV: Found scalar instruction: %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i 40; CHECK-NEXT: LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 41; CHECK-NEXT: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 42; CHECK: vector.body: 43; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 44; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 45; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 2 46; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* %b, i64 [[OFFSET_IDX]] 47; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* %b, i64 [[TMP4]] 48; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[OFFSET_IDX]] 49; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[TMP4]] 50; CHECK-NEXT: store i32* [[TMP5]], i32** [[TMP7]], align 8 51; CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP8]], align 8 52; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 53; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 54; 55define void @scalar_store(i32** %a, i32 *%b, i64 %n) { 56entry: 57 br label %for.body 58 59for.body: 60 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 61 %tmp0 = getelementptr inbounds i32, i32* %b, i64 %i 62 %tmp1 = getelementptr inbounds i32*, i32** %a, i64 %i 63 store i32* %tmp0, i32** %tmp1, align 8 64 %i.next = add nuw nsw i64 %i, 2 65 %cond = icmp slt i64 %i.next, %n 66 br i1 %cond, label %for.body, label %for.end 67 68for.end: 69 ret void 70} 71 72; CHECK-LABEL: expansion 73; CHECK: LV: Found scalar instruction: %tmp3 = getelementptr inbounds i32*, i32** %tmp2, i64 %i 74; CHECK-NEXT: LV: Found scalar instruction: %tmp1 = bitcast i64* %tmp0 to i32* 75; CHECK-NEXT: LV: Found scalar instruction: %tmp2 = getelementptr inbounds i32*, i32** %a, i64 0 76; CHECK-NEXT: LV: Found scalar instruction: %tmp0 = getelementptr inbounds i64, i64* %b, i64 %i 77; CHECK-NEXT: LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 78; CHECK-NEXT: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2 79; CHECK: vector.body: 80; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 81; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 82; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 2 83; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, i64* %b, i64 [[OFFSET_IDX]] 84; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, i64* %b, i64 [[TMP4]] 85; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[OFFSET_IDX]] 86; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[TMP4]] 87; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32** [[TMP7]] to i64** 88; CHECK-NEXT: store i64* [[TMP5]], i64** [[TMP9]], align 8 89; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32** [[TMP8]] to i64** 90; CHECK-NEXT: store i64* [[TMP6]], i64** [[TMP10]], align 8 91; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 92; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 93; 94define void @expansion(i32** %a, i64 *%b, i64 %n) { 95entry: 96 br label %for.body 97 98for.body: 99 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 100 %tmp0 = getelementptr inbounds i64, i64* %b, i64 %i 101 %tmp1 = bitcast i64* %tmp0 to i32* 102 %tmp2 = getelementptr inbounds i32*, i32** %a, i64 0 103 %tmp3 = getelementptr inbounds i32*, i32** %tmp2, i64 %i 104 store i32* %tmp1, i32** %tmp3, align 8 105 %i.next = add nuw nsw i64 %i, 2 106 %cond = icmp slt i64 %i.next, %n 107 br i1 %cond, label %for.body, label %for.end 108 109for.end: 110 ret void 111} 112 113; CHECK-LABEL: no_gep_or_bitcast 114; CHECK-NOT: LV: Found scalar instruction: %tmp1 = load i32*, i32** %tmp0, align 8 115; CHECK: LV: Found scalar instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 116; CHECK-NEXT: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 1 117; CHECK: vector.body: 118; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] 119; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32*, i32** %a, i64 [[INDEX]] 120; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32** [[TMP1]] to <2 x i32*>* 121; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32*>, <2 x i32*>* [[TMP2]], align 8 122; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i32 0 123; CHECK-NEXT: store i32 0, i32* [[TMP3]], align 8 124; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32*> [[WIDE_LOAD]], i32 1 125; CHECK-NEXT: store i32 0, i32* [[TMP4]], align 8 126; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 127; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body 128; 129define void @no_gep_or_bitcast(i32** noalias %a, i64 %n) { 130entry: 131 br label %for.body 132 133for.body: 134 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 135 %tmp0 = getelementptr inbounds i32*, i32** %a, i64 %i 136 %tmp1 = load i32*, i32** %tmp0, align 8 137 store i32 0, i32* %tmp1, align 8 138 %i.next = add nuw nsw i64 %i, 1 139 %cond = icmp slt i64 %i.next, %n 140 br i1 %cond, label %for.body, label %for.end 141 142for.end: 143 ret void 144} 145