1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s 3; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2 4; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4 5 6target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 7 8; Make sure a loop is vectorized correctly with fold-tail when the constant 9; trip-count is not a multiple of -force-vector-width and/or 10; -force-vector-interleave, but is a multiple of the internally computed MaxVF; 11; e.g., when all types are i32 lead to MaxVF=1. 12 13define void @pr45679(ptr %A) optsize { 14; CHECK-LABEL: @pr45679( 15; CHECK-NEXT: entry: 16; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 17; CHECK: vector.ph: 18; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 19; CHECK: vector.body: 20; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 21; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] 22; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13) 23; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 24; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 25; CHECK: pred.store.if: 26; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 0 27; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]] 28; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1 29; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 30; CHECK: pred.store.continue: 31; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 32; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 33; CHECK: pred.store.if1: 34; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1 35; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]] 36; CHECK-NEXT: store i32 13, ptr [[TMP6]], align 1 37; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 38; CHECK: pred.store.continue2: 39; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 40; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 41; CHECK: pred.store.if3: 42; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 2 43; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]] 44; CHECK-NEXT: store i32 13, ptr [[TMP9]], align 1 45; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] 46; CHECK: pred.store.continue4: 47; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 48; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 49; CHECK: pred.store.if5: 50; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 3 51; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]] 52; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1 53; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] 54; CHECK: pred.store.continue6: 55; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 56; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 57; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 58; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 59; CHECK: middle.block: 60; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 61; CHECK: scalar.ph: 62; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 63; CHECK-NEXT: br label [[LOOP:%.*]] 64; CHECK: loop: 65; CHECK-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] 66; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] 67; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 68; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 69; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 70; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] 71; CHECK: exit: 72; CHECK-NEXT: ret void 73; 74; VF2UF2-LABEL: @pr45679( 75; VF2UF2-NEXT: entry: 76; VF2UF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 77; VF2UF2: vector.ph: 78; VF2UF2-NEXT: br label [[VECTOR_BODY:%.*]] 79; VF2UF2: vector.body: 80; VF2UF2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] 81; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] 82; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) 83; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13) 84; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13) 85; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 86; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 87; VF2UF2: pred.store.if: 88; VF2UF2-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 89; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] 90; VF2UF2-NEXT: store i32 13, ptr [[TMP4]], align 1 91; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] 92; VF2UF2: pred.store.continue: 93; VF2UF2-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 94; VF2UF2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] 95; VF2UF2: pred.store.if1: 96; VF2UF2-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 97; VF2UF2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP6]] 98; VF2UF2-NEXT: store i32 13, ptr [[TMP7]], align 1 99; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]] 100; VF2UF2: pred.store.continue2: 101; VF2UF2-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 102; VF2UF2-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 103; VF2UF2: pred.store.if3: 104; VF2UF2-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 2 105; VF2UF2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP9]] 106; VF2UF2-NEXT: store i32 13, ptr [[TMP10]], align 1 107; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] 108; VF2UF2: pred.store.continue4: 109; VF2UF2-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 110; VF2UF2-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] 111; VF2UF2: pred.store.if5: 112; VF2UF2-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 3 113; VF2UF2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]] 114; VF2UF2-NEXT: store i32 13, ptr [[TMP13]], align 1 115; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] 116; VF2UF2: pred.store.continue6: 117; VF2UF2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 118; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2) 119; VF2UF2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 120; VF2UF2-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 121; VF2UF2: middle.block: 122; VF2UF2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 123; VF2UF2: scalar.ph: 124; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 125; VF2UF2-NEXT: br label [[LOOP:%.*]] 126; VF2UF2: loop: 127; VF2UF2-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] 128; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] 129; VF2UF2-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 130; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 131; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 132; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] 133; VF2UF2: exit: 134; VF2UF2-NEXT: ret void 135; 136; VF1UF4-LABEL: @pr45679( 137; VF1UF4-NEXT: entry: 138; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 139; VF1UF4: vector.ph: 140; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] 141; VF1UF4: vector.body: 142; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 143; VF1UF4-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 144; VF1UF4-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 145; VF1UF4-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 146; VF1UF4-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 147; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13 148; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13 149; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13 150; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13 151; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 152; VF1UF4: pred.store.if: 153; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] 154; VF1UF4-NEXT: store i32 13, ptr [[TMP8]], align 1 155; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] 156; VF1UF4: pred.store.continue: 157; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 158; VF1UF4: pred.store.if1: 159; VF1UF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]] 160; VF1UF4-NEXT: store i32 13, ptr [[TMP9]], align 1 161; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]] 162; VF1UF4: pred.store.continue2: 163; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 164; VF1UF4: pred.store.if3: 165; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP2]] 166; VF1UF4-NEXT: store i32 13, ptr [[TMP10]], align 1 167; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]] 168; VF1UF4: pred.store.continue4: 169; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 170; VF1UF4: pred.store.if5: 171; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP3]] 172; VF1UF4-NEXT: store i32 13, ptr [[TMP11]], align 1 173; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] 174; VF1UF4: pred.store.continue6: 175; VF1UF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 176; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 177; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 178; VF1UF4: middle.block: 179; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 180; VF1UF4: scalar.ph: 181; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 182; VF1UF4-NEXT: br label [[LOOP:%.*]] 183; VF1UF4: loop: 184; VF1UF4-NEXT: [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ] 185; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]] 186; VF1UF4-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 187; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 188; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 189; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] 190; VF1UF4: exit: 191; VF1UF4-NEXT: ret void 192; 193entry: 194 br label %loop 195 196loop: 197 %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ] 198 %arrayidx = getelementptr inbounds i32, ptr %A, i32 %riv 199 store i32 13, ptr %arrayidx, align 1 200 %rivPlus1 = add nuw nsw i32 %riv, 1 201 %cond = icmp eq i32 %rivPlus1, 14 202 br i1 %cond, label %exit, label %loop 203 204exit: 205 ret void 206} 207 208define void @load_variant(ptr noalias %a, ptr noalias %b) { 209; CHECK-LABEL: @load_variant( 210; CHECK-NEXT: entry: 211; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 212; CHECK: vector.ph: 213; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 214; CHECK: vector.body: 215; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 216; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] 217; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13) 218; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 219; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 220; CHECK: pred.store.if: 221; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 222; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 223; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 224; CHECK-NEXT: store i64 [[TMP4]], ptr [[B:%.*]], align 8 225; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] 226; CHECK: pred.store.continue: 227; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 228; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 229; CHECK: pred.store.if1: 230; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 231; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]] 232; CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 233; CHECK-NEXT: store i64 [[TMP9]], ptr [[B]], align 8 234; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] 235; CHECK: pred.store.continue2: 236; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 237; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 238; CHECK: pred.store.if3: 239; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2 240; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]] 241; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 242; CHECK-NEXT: store i64 [[TMP14]], ptr [[B]], align 8 243; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] 244; CHECK: pred.store.continue4: 245; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 246; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 247; CHECK: pred.store.if5: 248; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 3 249; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP17]] 250; CHECK-NEXT: [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 8 251; CHECK-NEXT: store i64 [[TMP19]], ptr [[B]], align 8 252; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] 253; CHECK: pred.store.continue6: 254; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 255; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 256; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 257; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 258; CHECK: middle.block: 259; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 260; CHECK: scalar.ph: 261; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 262; CHECK-NEXT: br label [[FOR_BODY:%.*]] 263; CHECK: for.body: 264; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 265; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 266; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 267; CHECK-NEXT: store i64 [[V]], ptr [[B]], align 8 268; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 269; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 270; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 271; CHECK: for.end: 272; CHECK-NEXT: ret void 273; 274; VF2UF2-LABEL: @load_variant( 275; VF2UF2-NEXT: entry: 276; VF2UF2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 277; VF2UF2: vector.ph: 278; VF2UF2-NEXT: br label [[VECTOR_BODY:%.*]] 279; VF2UF2: vector.body: 280; VF2UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] 281; VF2UF2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ] 282; VF2UF2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) 283; VF2UF2-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13) 284; VF2UF2-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13) 285; VF2UF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0 286; VF2UF2-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 287; VF2UF2: pred.store.if: 288; VF2UF2-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 289; VF2UF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] 290; VF2UF2-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 291; VF2UF2-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8 292; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE]] 293; VF2UF2: pred.store.continue: 294; VF2UF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 295; VF2UF2-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] 296; VF2UF2: pred.store.if1: 297; VF2UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 298; VF2UF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]] 299; VF2UF2-NEXT: [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8 300; VF2UF2-NEXT: store i64 [[TMP10]], ptr [[B]], align 8 301; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE3]] 302; VF2UF2: pred.store.continue2: 303; VF2UF2-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 304; VF2UF2-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 305; VF2UF2: pred.store.if3: 306; VF2UF2-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 2 307; VF2UF2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]] 308; VF2UF2-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 309; VF2UF2-NEXT: store i64 [[TMP15]], ptr [[B]], align 8 310; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE5]] 311; VF2UF2: pred.store.continue4: 312; VF2UF2-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 313; VF2UF2-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] 314; VF2UF2: pred.store.if5: 315; VF2UF2-NEXT: [[TMP18:%.*]] = add i64 [[INDEX]], 3 316; VF2UF2-NEXT: [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]] 317; VF2UF2-NEXT: [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 8 318; VF2UF2-NEXT: store i64 [[TMP20]], ptr [[B]], align 8 319; VF2UF2-NEXT: br label [[PRED_STORE_CONTINUE7]] 320; VF2UF2: pred.store.continue6: 321; VF2UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 322; VF2UF2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) 323; VF2UF2-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 324; VF2UF2-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 325; VF2UF2: middle.block: 326; VF2UF2-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 327; VF2UF2: scalar.ph: 328; VF2UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 329; VF2UF2-NEXT: br label [[FOR_BODY:%.*]] 330; VF2UF2: for.body: 331; VF2UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 332; VF2UF2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 333; VF2UF2-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 334; VF2UF2-NEXT: store i64 [[V]], ptr [[B]], align 8 335; VF2UF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 336; VF2UF2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 337; VF2UF2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 338; VF2UF2: for.end: 339; VF2UF2-NEXT: ret void 340; 341; VF1UF4-LABEL: @load_variant( 342; VF1UF4-NEXT: entry: 343; VF1UF4-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 344; VF1UF4: vector.ph: 345; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] 346; VF1UF4: vector.body: 347; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] 348; VF1UF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 349; VF1UF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 350; VF1UF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 351; VF1UF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 352; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13 353; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13 354; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13 355; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13 356; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 357; VF1UF4: pred.store.if: 358; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 359; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 360; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 361; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] 362; VF1UF4: pred.store.continue: 363; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] 364; VF1UF4: pred.store.if1: 365; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 366; VF1UF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 367; VF1UF4-NEXT: store i64 [[TMP12]], ptr [[B]], align 8 368; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]] 369; VF1UF4: pred.store.continue2: 370; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 371; VF1UF4: pred.store.if3: 372; VF1UF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] 373; VF1UF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 374; VF1UF4-NEXT: store i64 [[TMP15]], ptr [[B]], align 8 375; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]] 376; VF1UF4: pred.store.continue4: 377; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] 378; VF1UF4: pred.store.if5: 379; VF1UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] 380; VF1UF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 381; VF1UF4-NEXT: store i64 [[TMP18]], ptr [[B]], align 8 382; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] 383; VF1UF4: pred.store.continue6: 384; VF1UF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 385; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 386; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 387; VF1UF4: middle.block: 388; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 389; VF1UF4: scalar.ph: 390; VF1UF4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 391; VF1UF4-NEXT: br label [[FOR_BODY:%.*]] 392; VF1UF4: for.body: 393; VF1UF4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 394; VF1UF4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 395; VF1UF4-NEXT: [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 396; VF1UF4-NEXT: store i64 [[V]], ptr [[B]], align 8 397; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 398; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 399; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 400; VF1UF4: for.end: 401; VF1UF4-NEXT: ret void 402; 403entry: 404 br label %for.body 405 406for.body: 407 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 408 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 409 %v = load i64, ptr %arrayidx 410 store i64 %v, ptr %b 411 %iv.next = add nuw nsw i64 %iv, 1 412 %exitcond.not = icmp eq i64 %iv.next, 14 413 br i1 %exitcond.not, label %for.end, label %for.body 414 415for.end: 416 ret void 417} 418