1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 2; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -S | FileCheck %s --check-prefix VF-TWO-CHECK 3; RUN: opt < %s -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S | FileCheck %s --check-prefix VF-FOUR-CHECK 4 5target datalayout = "e-m:e-i64:64-n32:64" 6target triple = "powerpc64le-unknown-linux-gnu" 7 8; Function Attrs: nounwind 9define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) #0 { 10; VF-TWO-CHECK-LABEL: define dso_local void @f1( 11; VF-TWO-CHECK-SAME: ptr noalias [[AA:%.*]], ptr noalias [[BB:%.*]], ptr noalias [[CC:%.*]], i32 signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { 12; VF-TWO-CHECK-NEXT: entry: 13; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0 14; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] 15; VF-TWO-CHECK: iter.check: 16; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 17; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2 18; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 19; VF-TWO-CHECK: vector.main.loop.iter.check: 20; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 21; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 22; VF-TWO-CHECK: vector.ph: 23; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 24; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 25; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 26; VF-TWO-CHECK: vector.body: 27; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 28; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 29; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[TMP0]] 30; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 31; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 32; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 33; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 34; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 35; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 36; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 37; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 38; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 39; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 40; VF-TWO-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 41; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP19]], align 4 42; VF-TWO-CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP20]], align 4 43; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 44; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 45; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 46; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[TMP0]] 47; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 0 48; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 49; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 50; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 51; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 52; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 53; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 54; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 55; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP32]], align 4 56; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 57; VF-TWO-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 58; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP35]], align 4 59; VF-TWO-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP36]], align 4 60; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP37]], align 4 61; VF-TWO-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, ptr [[TMP38]], align 4 62; VF-TWO-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, ptr [[TMP39]], align 4 63; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD9]] 64; VF-TWO-CHECK-NEXT: [[TMP41:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD10]] 65; VF-TWO-CHECK-NEXT: [[TMP42:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD11]] 66; VF-TWO-CHECK-NEXT: [[TMP43:%.*]] = fadd fast <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD12]] 67; VF-TWO-CHECK-NEXT: [[TMP44:%.*]] = fadd fast <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD13]] 68; VF-TWO-CHECK-NEXT: [[TMP45:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD14]] 69; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] 70; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] 71; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[TMP0]] 72; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 73; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 74; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 75; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 76; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 77; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 78; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 79; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 80; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP56]], align 4 81; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 82; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 83; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP43]], ptr [[TMP59]], align 4 84; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP44]], ptr [[TMP60]], align 4 85; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP45]], ptr [[TMP61]], align 4 86; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP46]], ptr [[TMP62]], align 4 87; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP47]], ptr [[TMP63]], align 4 88; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 89; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 90; VF-TWO-CHECK-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 91; VF-TWO-CHECK: middle.block: 92; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 93; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 94; VF-TWO-CHECK: vec.epilog.iter.check: 95; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 96; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 97; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 98; VF-TWO-CHECK: vec.epilog.ph: 99; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 100; VF-TWO-CHECK-NEXT: [[N_MOD_VF17:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 101; VF-TWO-CHECK-NEXT: [[N_VEC18:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF17]] 102; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 103; VF-TWO-CHECK: vec.epilog.vector.body: 104; VF-TWO-CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 105; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX20]], 0 106; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[TMP65]] 107; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 0 108; VF-TWO-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <2 x float>, ptr [[TMP67]], align 4 109; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[TMP65]] 110; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 0 111; VF-TWO-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <2 x float>, ptr [[TMP69]], align 4 112; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = fadd fast <2 x float> [[WIDE_LOAD21]], [[WIDE_LOAD22]] 113; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[TMP65]] 114; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, ptr [[TMP71]], i32 0 115; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP70]], ptr [[TMP72]], align 4 116; VF-TWO-CHECK-NEXT: [[INDEX_NEXT23]] = add nuw i64 [[INDEX20]], 2 117; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = icmp eq i64 [[INDEX_NEXT23]], [[N_VEC18]] 118; VF-TWO-CHECK-NEXT: br i1 [[TMP73]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 119; VF-TWO-CHECK: vec.epilog.middle.block: 120; VF-TWO-CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC18]] 121; VF-TWO-CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 122; VF-TWO-CHECK: vec.epilog.scalar.ph: 123; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 124; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] 125; VF-TWO-CHECK: for.body: 126; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 127; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDVARS_IV]] 128; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = load float, ptr [[ARRAYIDX]], align 4 129; VF-TWO-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDVARS_IV]] 130; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 131; VF-TWO-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP74]], [[TMP75]] 132; VF-TWO-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDVARS_IV]] 133; VF-TWO-CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX4]], align 4 134; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 135; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 136; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] 137; VF-TWO-CHECK: for.end.loopexit: 138; VF-TWO-CHECK-NEXT: br label [[FOR_END]] 139; VF-TWO-CHECK: for.end: 140; VF-TWO-CHECK-NEXT: ret void 141; 142; VF-FOUR-CHECK-LABEL: define dso_local void @f1( 143; VF-FOUR-CHECK-SAME: ptr noalias [[AA:%.*]], ptr noalias [[BB:%.*]], ptr noalias [[CC:%.*]], i32 signext [[N:%.*]]) #[[ATTR0:[0-9]+]] { 144; VF-FOUR-CHECK-NEXT: entry: 145; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0 146; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] 147; VF-FOUR-CHECK: iter.check: 148; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 149; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 150; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 151; VF-FOUR-CHECK: vector.main.loop.iter.check: 152; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 153; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 154; VF-FOUR-CHECK: vector.ph: 155; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 156; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 157; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 158; VF-FOUR-CHECK: vector.body: 159; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 160; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 161; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[TMP0]] 162; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0 163; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 164; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 165; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 166; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 167; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 168; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 169; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 170; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP16]], align 4 171; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 172; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 173; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP19]], align 4 174; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP20]], align 4 175; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP21]], align 4 176; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 177; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 178; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[TMP0]] 179; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 0 180; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 181; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 182; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 183; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 184; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 185; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 186; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 187; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP32]], align 4 188; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 189; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 190; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP35]], align 4 191; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x float>, ptr [[TMP36]], align 4 192; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP37]], align 4 193; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x float>, ptr [[TMP38]], align 4 194; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD16:%.*]] = load <4 x float>, ptr [[TMP39]], align 4 195; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD9]] 196; VF-FOUR-CHECK-NEXT: [[TMP41:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD10]] 197; VF-FOUR-CHECK-NEXT: [[TMP42:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD11]] 198; VF-FOUR-CHECK-NEXT: [[TMP43:%.*]] = fadd fast <4 x float> [[WIDE_LOAD4]], [[WIDE_LOAD12]] 199; VF-FOUR-CHECK-NEXT: [[TMP44:%.*]] = fadd fast <4 x float> [[WIDE_LOAD5]], [[WIDE_LOAD13]] 200; VF-FOUR-CHECK-NEXT: [[TMP45:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD14]] 201; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] 202; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] 203; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[TMP0]] 204; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 205; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 206; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 207; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 208; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 209; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 210; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 211; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 212; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP56]], align 4 213; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 214; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 215; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP43]], ptr [[TMP59]], align 4 216; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP44]], ptr [[TMP60]], align 4 217; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP45]], ptr [[TMP61]], align 4 218; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP46]], ptr [[TMP62]], align 4 219; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP47]], ptr [[TMP63]], align 4 220; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 221; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 222; VF-FOUR-CHECK-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 223; VF-FOUR-CHECK: middle.block: 224; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 225; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 226; VF-FOUR-CHECK: vec.epilog.iter.check: 227; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 228; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 229; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 230; VF-FOUR-CHECK: vec.epilog.ph: 231; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 232; VF-FOUR-CHECK-NEXT: [[N_MOD_VF17:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 233; VF-FOUR-CHECK-NEXT: [[N_VEC18:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF17]] 234; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 235; VF-FOUR-CHECK: vec.epilog.vector.body: 236; VF-FOUR-CHECK-NEXT: [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 237; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = add i64 [[INDEX20]], 0 238; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[TMP65]] 239; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 0 240; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD21:%.*]] = load <4 x float>, ptr [[TMP67]], align 4 241; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[TMP65]] 242; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 0 243; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD22:%.*]] = load <4 x float>, ptr [[TMP69]], align 4 244; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = fadd fast <4 x float> [[WIDE_LOAD21]], [[WIDE_LOAD22]] 245; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[TMP65]] 246; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds float, ptr [[TMP71]], i32 0 247; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP70]], ptr [[TMP72]], align 4 248; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT23]] = add nuw i64 [[INDEX20]], 4 249; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = icmp eq i64 [[INDEX_NEXT23]], [[N_VEC18]] 250; VF-FOUR-CHECK-NEXT: br i1 [[TMP73]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 251; VF-FOUR-CHECK: vec.epilog.middle.block: 252; VF-FOUR-CHECK-NEXT: [[CMP_N19:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC18]] 253; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 254; VF-FOUR-CHECK: vec.epilog.scalar.ph: 255; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 256; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] 257; VF-FOUR-CHECK: for.body: 258; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 259; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDVARS_IV]] 260; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = load float, ptr [[ARRAYIDX]], align 4 261; VF-FOUR-CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDVARS_IV]] 262; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 263; VF-FOUR-CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP74]], [[TMP75]] 264; VF-FOUR-CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDVARS_IV]] 265; VF-FOUR-CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX4]], align 4 266; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 267; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 268; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]] 269; VF-FOUR-CHECK: for.end.loopexit: 270; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] 271; VF-FOUR-CHECK: for.end: 272; VF-FOUR-CHECK-NEXT: ret void 273; 274 275 276entry: 277 %cmp1 = icmp sgt i32 %N, 0 278 br i1 %cmp1, label %for.body.preheader, label %for.end 279 280for.body.preheader: ; preds = %entry 281 %wide.trip.count = zext i32 %N to i64 282 br label %for.body 283 284for.body: ; preds = %for.body.preheader, %for.body 285 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 286 %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv 287 %0 = load float, ptr %arrayidx, align 4 288 %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv 289 %1 = load float, ptr %arrayidx2, align 4 290 %add = fadd fast float %0, %1 291 %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv 292 store float %add, ptr %arrayidx4, align 4 293 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 294 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 295 br i1 %exitcond, label %for.body, label %for.end.loopexit 296 297for.end.loopexit: ; preds = %for.body 298 br label %for.end 299 300for.end: ; preds = %for.end.loopexit, %entry 301 ret void 302} 303 304define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) #0 { 305; VF-TWO-CHECK-LABEL: define dso_local signext i32 @f2( 306; VF-TWO-CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i32 signext [[N:%.*]]) #[[ATTR0]] { 307; VF-TWO-CHECK-NEXT: entry: 308; VF-TWO-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 1 309; VF-TWO-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] 310; VF-TWO-CHECK: iter.check: 311; VF-TWO-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 312; VF-TWO-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 313; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2 314; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 315; VF-TWO-CHECK: vector.scevcheck: 316; VF-TWO-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 317; VF-TWO-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 318; VF-TWO-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) 319; VF-TWO-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 320; VF-TWO-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 321; VF-TWO-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] 322; VF-TWO-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]] 323; VF-TWO-CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 324; VF-TWO-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 325; VF-TWO-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 326; VF-TWO-CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 327; VF-TWO-CHECK: vector.main.loop.iter.check: 328; VF-TWO-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 329; VF-TWO-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 330; VF-TWO-CHECK: vector.ph: 331; VF-TWO-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 332; VF-TWO-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 333; VF-TWO-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 334; VF-TWO-CHECK: vector.body: 335; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 336; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 337; VF-TWO-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 338; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 0 339; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP16]], -1 340; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] 341; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 342; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] 343; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 344; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 345; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 346; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 347; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 348; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 349; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 350; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 351; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 352; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 353; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 354; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 355; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 356; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 357; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 358; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 359; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 360; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 361; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 362; VF-TWO-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 363; VF-TWO-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 364; VF-TWO-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 365; VF-TWO-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP63]], align 4 366; VF-TWO-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 367; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP65]], align 4 368; VF-TWO-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 369; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP67]], align 4 370; VF-TWO-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 371; VF-TWO-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP69]], align 4 372; VF-TWO-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 373; VF-TWO-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 374; VF-TWO-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 375; VF-TWO-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) 376; VF-TWO-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) 377; VF-TWO-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) 378; VF-TWO-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) 379; VF-TWO-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) 380; VF-TWO-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) 381; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) 382; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) 383; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] 384; VF-TWO-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 385; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 386; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 387; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 388; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 389; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 390; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 391; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 392; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP88]], align 4 393; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 394; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 395; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP75]], ptr [[TMP91]], align 4 396; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP76]], ptr [[TMP92]], align 4 397; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP77]], ptr [[TMP93]], align 4 398; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP78]], ptr [[TMP94]], align 4 399; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP79]], ptr [[TMP95]], align 4 400; VF-TWO-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 401; VF-TWO-CHECK-NEXT: [[TMP96:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 402; VF-TWO-CHECK-NEXT: br i1 [[TMP96]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 403; VF-TWO-CHECK: middle.block: 404; VF-TWO-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 405; VF-TWO-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 406; VF-TWO-CHECK: vec.epilog.iter.check: 407; VF-TWO-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 408; VF-TWO-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 409; VF-TWO-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2 410; VF-TWO-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 411; VF-TWO-CHECK: vec.epilog.ph: 412; VF-TWO-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 413; VF-TWO-CHECK-NEXT: [[N_MOD_VF16:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2 414; VF-TWO-CHECK-NEXT: [[N_VEC17:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF16]] 415; VF-TWO-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC17]] to i32 416; VF-TWO-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 417; VF-TWO-CHECK: vec.epilog.vector.body: 418; VF-TWO-CHECK-NEXT: [[INDEX21:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 419; VF-TWO-CHECK-NEXT: [[TMP97:%.*]] = add i64 [[INDEX21]], 0 420; VF-TWO-CHECK-NEXT: [[OFFSET_IDX22:%.*]] = trunc i64 [[INDEX21]] to i32 421; VF-TWO-CHECK-NEXT: [[TMP98:%.*]] = add i32 [[OFFSET_IDX22]], 0 422; VF-TWO-CHECK-NEXT: [[TMP99:%.*]] = xor i32 [[TMP98]], -1 423; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] 424; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 425; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] 426; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 427; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 428; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 429; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> <i32 1, i32 0> 430; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) 431; VF-TWO-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] 432; VF-TWO-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 433; VF-TWO-CHECK-NEXT: store <2 x float> [[TMP105]], ptr [[TMP107]], align 4 434; VF-TWO-CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX21]], 2 435; VF-TWO-CHECK-NEXT: [[TMP108:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC17]] 436; VF-TWO-CHECK-NEXT: br i1 [[TMP108]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 437; VF-TWO-CHECK: vec.epilog.middle.block: 438; VF-TWO-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] 439; VF-TWO-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 440; VF-TWO-CHECK: vec.epilog.scalar.ph: 441; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 442; VF-TWO-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ] 443; VF-TWO-CHECK-NEXT: br label [[FOR_BODY:%.*]] 444; VF-TWO-CHECK: for.body: 445; VF-TWO-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 446; VF-TWO-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL19]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 447; VF-TWO-CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[I_014]], -1 448; VF-TWO-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP109]], [[N]] 449; VF-TWO-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 450; VF-TWO-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IDXPROM]] 451; VF-TWO-CHECK-NEXT: [[TMP110:%.*]] = load float, ptr [[ARRAYIDX]], align 4 452; VF-TWO-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP110]], 1.000000e+00 453; VF-TWO-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 454; VF-TWO-CHECK-NEXT: store float [[CONV3]], ptr [[ARRAYIDX5]], align 4 455; VF-TWO-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 456; VF-TWO-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 457; VF-TWO-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 458; VF-TWO-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] 459; VF-TWO-CHECK: for.end.loopexit: 460; VF-TWO-CHECK-NEXT: br label [[FOR_END]] 461; VF-TWO-CHECK: for.end: 462; VF-TWO-CHECK-NEXT: ret i32 0 463; 464; VF-FOUR-CHECK-LABEL: define dso_local signext i32 @f2( 465; VF-FOUR-CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i32 signext [[N:%.*]]) #[[ATTR0]] { 466; VF-FOUR-CHECK-NEXT: entry: 467; VF-FOUR-CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 1 468; VF-FOUR-CHECK-NEXT: br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]] 469; VF-FOUR-CHECK: iter.check: 470; VF-FOUR-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 471; VF-FOUR-CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64 472; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 473; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] 474; VF-FOUR-CHECK: vector.scevcheck: 475; VF-FOUR-CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1 476; VF-FOUR-CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 477; VF-FOUR-CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]]) 478; VF-FOUR-CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 479; VF-FOUR-CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 480; VF-FOUR-CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]] 481; VF-FOUR-CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]] 482; VF-FOUR-CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]] 483; VF-FOUR-CHECK-NEXT: [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295 484; VF-FOUR-CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] 485; VF-FOUR-CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] 486; VF-FOUR-CHECK: vector.main.loop.iter.check: 487; VF-FOUR-CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 32 488; VF-FOUR-CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] 489; VF-FOUR-CHECK: vector.ph: 490; VF-FOUR-CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 32 491; VF-FOUR-CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 492; VF-FOUR-CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 493; VF-FOUR-CHECK: vector.body: 494; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 495; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 496; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 497; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 0 498; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP16]], -1 499; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] 500; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 501; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] 502; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 503; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 504; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 505; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 506; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 507; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 508; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 509; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 510; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 511; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 512; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 513; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 514; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 515; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 516; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 517; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 518; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 519; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 520; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 521; VF-FOUR-CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x float> [[WIDE_LOAD2]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 522; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP61]], align 4 523; VF-FOUR-CHECK-NEXT: [[REVERSE5:%.*]] = shufflevector <4 x float> [[WIDE_LOAD4]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 524; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP63]], align 4 525; VF-FOUR-CHECK-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x float> [[WIDE_LOAD6]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 526; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP65]], align 4 527; VF-FOUR-CHECK-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x float> [[WIDE_LOAD8]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 528; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP67]], align 4 529; VF-FOUR-CHECK-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x float> [[WIDE_LOAD10]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 530; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x float>, ptr [[TMP69]], align 4 531; VF-FOUR-CHECK-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x float> [[WIDE_LOAD12]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 532; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x float>, ptr [[TMP71]], align 4 533; VF-FOUR-CHECK-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x float> [[WIDE_LOAD14]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 534; VF-FOUR-CHECK-NEXT: [[TMP72:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) 535; VF-FOUR-CHECK-NEXT: [[TMP73:%.*]] = fadd fast <4 x float> [[REVERSE3]], splat (float 1.000000e+00) 536; VF-FOUR-CHECK-NEXT: [[TMP74:%.*]] = fadd fast <4 x float> [[REVERSE5]], splat (float 1.000000e+00) 537; VF-FOUR-CHECK-NEXT: [[TMP75:%.*]] = fadd fast <4 x float> [[REVERSE7]], splat (float 1.000000e+00) 538; VF-FOUR-CHECK-NEXT: [[TMP76:%.*]] = fadd fast <4 x float> [[REVERSE9]], splat (float 1.000000e+00) 539; VF-FOUR-CHECK-NEXT: [[TMP77:%.*]] = fadd fast <4 x float> [[REVERSE11]], splat (float 1.000000e+00) 540; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) 541; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) 542; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] 543; VF-FOUR-CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 0 544; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 545; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 546; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 547; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 548; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 549; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 550; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 551; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP88]], align 4 552; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 553; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 554; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP75]], ptr [[TMP91]], align 4 555; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP76]], ptr [[TMP92]], align 4 556; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP77]], ptr [[TMP93]], align 4 557; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP78]], ptr [[TMP94]], align 4 558; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP79]], ptr [[TMP95]], align 4 559; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 560; VF-FOUR-CHECK-NEXT: [[TMP96:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 561; VF-FOUR-CHECK-NEXT: br i1 [[TMP96]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 562; VF-FOUR-CHECK: middle.block: 563; VF-FOUR-CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 564; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] 565; VF-FOUR-CHECK: vec.epilog.iter.check: 566; VF-FOUR-CHECK-NEXT: [[IND_END18:%.*]] = trunc i64 [[N_VEC]] to i32 567; VF-FOUR-CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 568; VF-FOUR-CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 569; VF-FOUR-CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] 570; VF-FOUR-CHECK: vec.epilog.ph: 571; VF-FOUR-CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] 572; VF-FOUR-CHECK-NEXT: [[N_MOD_VF16:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 573; VF-FOUR-CHECK-NEXT: [[N_VEC17:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF16]] 574; VF-FOUR-CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC17]] to i32 575; VF-FOUR-CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] 576; VF-FOUR-CHECK: vec.epilog.vector.body: 577; VF-FOUR-CHECK-NEXT: [[INDEX21:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] 578; VF-FOUR-CHECK-NEXT: [[TMP97:%.*]] = add i64 [[INDEX21]], 0 579; VF-FOUR-CHECK-NEXT: [[OFFSET_IDX22:%.*]] = trunc i64 [[INDEX21]] to i32 580; VF-FOUR-CHECK-NEXT: [[TMP98:%.*]] = add i32 [[OFFSET_IDX22]], 0 581; VF-FOUR-CHECK-NEXT: [[TMP99:%.*]] = xor i32 [[TMP98]], -1 582; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] 583; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 584; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] 585; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 586; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 587; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 588; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 589; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) 590; VF-FOUR-CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP97]] 591; VF-FOUR-CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds float, ptr [[TMP106]], i32 0 592; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP105]], ptr [[TMP107]], align 4 593; VF-FOUR-CHECK-NEXT: [[INDEX_NEXT25]] = add nuw i64 [[INDEX21]], 4 594; VF-FOUR-CHECK-NEXT: [[TMP108:%.*]] = icmp eq i64 [[INDEX_NEXT25]], [[N_VEC17]] 595; VF-FOUR-CHECK-NEXT: br i1 [[TMP108]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 596; VF-FOUR-CHECK: vec.epilog.middle.block: 597; VF-FOUR-CHECK-NEXT: [[CMP_N20:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC17]] 598; VF-FOUR-CHECK-NEXT: br i1 [[CMP_N20]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] 599; VF-FOUR-CHECK: vec.epilog.scalar.ph: 600; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC17]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ] 601; VF-FOUR-CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END18]], [[VEC_EPILOG_ITER_CHECK]] ] 602; VF-FOUR-CHECK-NEXT: br label [[FOR_BODY:%.*]] 603; VF-FOUR-CHECK: for.body: 604; VF-FOUR-CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 605; VF-FOUR-CHECK-NEXT: [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL19]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 606; VF-FOUR-CHECK-NEXT: [[TMP109:%.*]] = xor i32 [[I_014]], -1 607; VF-FOUR-CHECK-NEXT: [[SUB2:%.*]] = add i32 [[TMP109]], [[N]] 608; VF-FOUR-CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64 609; VF-FOUR-CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IDXPROM]] 610; VF-FOUR-CHECK-NEXT: [[TMP110:%.*]] = load float, ptr [[ARRAYIDX]], align 4 611; VF-FOUR-CHECK-NEXT: [[CONV3:%.*]] = fadd fast float [[TMP110]], 1.000000e+00 612; VF-FOUR-CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 613; VF-FOUR-CHECK-NEXT: store float [[CONV3]], ptr [[ARRAYIDX5]], align 4 614; VF-FOUR-CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 615; VF-FOUR-CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_014]], 1 616; VF-FOUR-CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 617; VF-FOUR-CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] 618; VF-FOUR-CHECK: for.end.loopexit: 619; VF-FOUR-CHECK-NEXT: br label [[FOR_END]] 620; VF-FOUR-CHECK: for.end: 621; VF-FOUR-CHECK-NEXT: ret i32 0 622; 623entry: 624 %cmp1 = icmp sgt i32 %n, 1 625 br i1 %cmp1, label %for.body.preheader, label %for.end 626 627for.body.preheader: ; preds = %entry 628 %0 = add i32 %n, -1 629 %wide.trip.count = zext i32 %0 to i64 630 br label %for.body 631 632for.body: ; preds = %for.body.preheader, %for.body 633 %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 634 %i.014 = phi i32 [ 0, %for.body.preheader ], [ %inc, %for.body ] 635 %1 = xor i32 %i.014, -1 636 %sub2 = add i32 %1, %n 637 %idxprom = sext i32 %sub2 to i64 638 %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom 639 %2 = load float, ptr %arrayidx, align 4 640 %conv3 = fadd fast float %2, 1.000000e+00 641 %arrayidx5 = getelementptr inbounds float, ptr %A, i64 %indvars.iv 642 store float %conv3, ptr %arrayidx5, align 4 643 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 644 %inc = add nuw nsw i32 %i.014, 1 645 %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count 646 br i1 %exitcond, label %for.body, label %for.end.loopexit 647 648for.end.loopexit: ; preds = %for.body 649 br label %for.end 650 651for.end: ; preds = %for.end.loopexit, %entry 652 ret i32 0 653} 654 655; 656; 657attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector,-spe" "unsafe-fp-math"="true" "use-soft-float"="false" } 658