1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; REQUIRES: asserts 3; RUN: opt < %s -aa-pipeline=basic-aa -passes=loop-vectorize,instcombine -force-vector-width=2 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s 4 5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 6 7; 8define void @vector_gep(ptr %a, ptr %b, i64 %n) { 9; CHECK-LABEL: @vector_gep( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 12; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 2 13; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 14; CHECK: vector.ph: 15; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 16; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 17; CHECK: vector.body: 18; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 19; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <2 x i64> [[VEC_IND]] 21; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] 22; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP1]], align 8 23; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 24; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) 25; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 26; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 27; CHECK: middle.block: 28; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 29; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 30; CHECK: scalar.ph: 31; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 32; CHECK-NEXT: br label [[FOR_BODY:%.*]] 33; CHECK: for.body: 34; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 35; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i64 [[I]] 36; CHECK-NEXT: [[VAR1:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]] 37; CHECK-NEXT: store ptr [[VAR0]], ptr [[VAR1]], align 8 38; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 39; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 40; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]] 41; CHECK: for.end: 42; CHECK-NEXT: ret void 43; 44entry: 45 br label %for.body 46 47for.body: 48 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 49 %var0 = getelementptr inbounds i32, ptr %b, i64 %i 50 %var1 = getelementptr inbounds ptr, ptr %a, i64 %i 51 store ptr %var0, ptr %var1, align 8 52 %i.next = add nuw nsw i64 %i, 1 53 %cond = icmp slt i64 %i.next, %n 54 br i1 %cond, label %for.body, label %for.end 55 56for.end: 57 ret void 58} 59 60; 61define void @scalar_store(ptr %a, ptr %b, i64 %n) { 62; CHECK-LABEL: @scalar_store( 63; CHECK-NEXT: entry: 64; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 65; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 66; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 67; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 68; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3 69; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 70; CHECK: vector.ph: 71; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 72; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 73; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 74; CHECK: vector.body: 75; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 76; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 77; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 2 78; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[OFFSET_IDX]] 79; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP3]] 80; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 81; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]] 82; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 83; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP7]], align 8 84; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 85; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 86; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 87; CHECK: middle.block: 88; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 89; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 90; CHECK: scalar.ph: 91; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 92; CHECK-NEXT: br label [[FOR_BODY:%.*]] 93; CHECK: for.body: 94; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 95; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] 96; CHECK-NEXT: [[VAR1:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]] 97; CHECK-NEXT: store ptr [[VAR0]], ptr [[VAR1]], align 8 98; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 99; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 100; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]] 101; CHECK: for.end: 102; CHECK-NEXT: ret void 103; 104entry: 105 br label %for.body 106 107for.body: 108 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 109 %var0 = getelementptr inbounds i32, ptr %b, i64 %i 110 %var1 = getelementptr inbounds ptr, ptr %a, i64 %i 111 store ptr %var0, ptr %var1, align 8 112 %i.next = add nuw nsw i64 %i, 2 113 %cond = icmp slt i64 %i.next, %n 114 br i1 %cond, label %for.body, label %for.end 115 116for.end: 117 ret void 118} 119 120; 121define void @expansion(ptr %a, ptr %b, i64 %n) { 122; CHECK-LABEL: @expansion( 123; CHECK-NEXT: entry: 124; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) 125; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 126; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 127; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 128; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 3 129; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 130; CHECK: vector.ph: 131; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 132; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 133; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 134; CHECK: vector.body: 135; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 136; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 137; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 2 138; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[OFFSET_IDX]] 139; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP3]] 140; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[OFFSET_IDX]] 141; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[TMP3]] 142; CHECK-NEXT: store ptr [[TMP4]], ptr [[TMP6]], align 8 143; CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP7]], align 8 144; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 145; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 146; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 147; CHECK: middle.block: 148; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 149; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 150; CHECK: scalar.ph: 151; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 152; CHECK-NEXT: br label [[FOR_BODY:%.*]] 153; CHECK: for.body: 154; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 155; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[I]] 156; CHECK-NEXT: [[VAR3:%.*]] = getelementptr inbounds ptr, ptr [[A]], i64 [[I]] 157; CHECK-NEXT: store ptr [[VAR0]], ptr [[VAR3]], align 8 158; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2 159; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 160; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]] 161; CHECK: for.end: 162; CHECK-NEXT: ret void 163; 164entry: 165 br label %for.body 166 167for.body: 168 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 169 %var0 = getelementptr inbounds i64, ptr %b, i64 %i 170 %var3 = getelementptr inbounds ptr, ptr %a, i64 %i 171 store ptr %var0, ptr %var3, align 8 172 %i.next = add nuw nsw i64 %i, 2 173 %cond = icmp slt i64 %i.next, %n 174 br i1 %cond, label %for.body, label %for.end 175 176for.end: 177 ret void 178} 179 180; 181define void @no_gep_or_bitcast(ptr noalias %a, i64 %n) { 182; CHECK-LABEL: @no_gep_or_bitcast( 183; CHECK-NEXT: entry: 184; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 185; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 2 186; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 187; CHECK: vector.ph: 188; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 189; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 190; CHECK: vector.body: 191; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 192; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] 193; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP0]], align 8 194; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 0 195; CHECK-NEXT: store i32 0, ptr [[TMP1]], align 8 196; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i64 1 197; CHECK-NEXT: store i32 0, ptr [[TMP2]], align 8 198; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 199; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 200; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 201; CHECK: middle.block: 202; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 203; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 204; CHECK: scalar.ph: 205; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 206; CHECK-NEXT: br label [[FOR_BODY:%.*]] 207; CHECK: for.body: 208; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 209; CHECK-NEXT: [[VAR0:%.*]] = getelementptr inbounds nuw ptr, ptr [[A]], i64 [[I]] 210; CHECK-NEXT: [[VAR1:%.*]] = load ptr, ptr [[VAR0]], align 8 211; CHECK-NEXT: store i32 0, ptr [[VAR1]], align 8 212; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 213; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 214; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP9:![0-9]+]] 215; CHECK: for.end: 216; CHECK-NEXT: ret void 217; 218entry: 219 br label %for.body 220 221for.body: 222 %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] 223 %var0 = getelementptr inbounds ptr, ptr %a, i64 %i 224 %var1 = load ptr, ptr %var0, align 8 225 store i32 0, ptr %var1, align 8 226 %i.next = add nuw nsw i64 %i, 1 227 %cond = icmp slt i64 %i.next, %n 228 br i1 %cond, label %for.body, label %for.end 229 230for.end: 231 ret void 232} 233