1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck --check-prefix VEC4_INTERL1 %s 3; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck --check-prefix VEC4_INTERL2 %s 4; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=2 -force-vector-width=1 -S | FileCheck --check-prefix VEC1_INTERL2 %s 5; RUN: opt < %s -passes=loop-vectorize,dce,simplifycfg,instcombine,simplifycfg -force-vector-interleave=1 -force-vector-width=2 -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s 6 7@fp_inc = common global float 0.000000e+00, align 4 8 9;void fp_iv_loop1(float init, ptr __restrict__ A, int N) { 10; float x = init; 11; for (int i=0; i < N; ++i) { 12; A[i] = x; 13; x -= fp_inc; 14; } 15;} 16 17 18 19 20define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) { 21; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF( 22; VEC4_INTERL1-NEXT: entry: 23; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 24; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 25; VEC4_INTERL1: for.body.lr.ph: 26; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 27; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 28; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 29; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 30; VEC4_INTERL1: vector.ph: 31; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 32; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 33; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] 34; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] 35; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 36; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 37; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 38; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 39; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 40; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]] 41; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 4.000000e+00 42; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 43; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 44; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 45; VEC4_INTERL1: vector.body: 46; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 47; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 48; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 49; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4 50; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 51; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 52; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 53; VEC4_INTERL1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] 54; VEC4_INTERL1: middle.block: 55; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 56; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 57; VEC4_INTERL1: scalar.ph: 58; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 59; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 60; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 61; VEC4_INTERL1: for.body: 62; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 63; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 64; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 65; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 66; VEC4_INTERL1-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 67; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 68; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 69; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 70; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 71; VEC4_INTERL1: for.end.loopexit: 72; VEC4_INTERL1-NEXT: br label [[FOR_END]] 73; VEC4_INTERL1: for.end: 74; VEC4_INTERL1-NEXT: ret void 75; 76; VEC4_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 77; VEC4_INTERL2-NEXT: entry: 78; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 79; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 80; VEC4_INTERL2: for.body.lr.ph: 81; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 82; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 83; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 84; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 85; VEC4_INTERL2: vector.ph: 86; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 87; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 88; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] 89; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] 90; VEC4_INTERL2-NEXT: [[FPINC_INS:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 91; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast <4 x float> [[FPINC_INS]], <float 4.000000e+00, float poison, float poison, float poison> 92; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <4 x i32> zeroinitializer 93; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 94; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 95; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 96; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 97; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 98; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP2]] 99; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 100; VEC4_INTERL2: vector.body: 101; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 102; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 103; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 104; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 105; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16 106; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4 107; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4 108; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 109; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 110; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 111; VEC4_INTERL2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] 112; VEC4_INTERL2: middle.block: 113; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 114; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 115; VEC4_INTERL2: scalar.ph: 116; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 117; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 118; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 119; VEC4_INTERL2: for.body: 120; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 121; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 122; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 123; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 124; VEC4_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 125; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 126; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 127; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 128; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 129; VEC4_INTERL2: for.end.loopexit: 130; VEC4_INTERL2-NEXT: br label [[FOR_END]] 131; VEC4_INTERL2: for.end: 132; VEC4_INTERL2-NEXT: ret void 133; 134; VEC1_INTERL2-LABEL: @fp_iv_loop1_fast_FMF( 135; VEC1_INTERL2-NEXT: entry: 136; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 137; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 138; VEC1_INTERL2: for.body.lr.ph: 139; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 140; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 141; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 142; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 143; VEC1_INTERL2: vector.ph: 144; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 145; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 146; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] 147; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] 148; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 149; VEC1_INTERL2: vector.body: 150; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 151; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 152; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 153; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[DOTCAST2]] 154; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP3]] 155; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]] 156; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 157; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] 158; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP5]], align 4 159; VEC1_INTERL2-NEXT: store float [[TMP4]], ptr [[TMP6]], align 4 160; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 161; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 162; VEC1_INTERL2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] 163; VEC1_INTERL2: middle.block: 164; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 165; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 166; VEC1_INTERL2: scalar.ph: 167; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 168; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 169; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 170; VEC1_INTERL2: for.body: 171; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 172; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 173; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 174; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 175; VEC1_INTERL2-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 176; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 177; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 178; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 179; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 180; VEC1_INTERL2: for.end.loopexit: 181; VEC1_INTERL2-NEXT: br label [[FOR_END]] 182; VEC1_INTERL2: for.end: 183; VEC1_INTERL2-NEXT: ret void 184; 185; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF( 186; VEC2_INTERL1_PRED_STORE-NEXT: entry: 187; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 188; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 189; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 190; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 191; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 192; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 193; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 194; VEC2_INTERL1_PRED_STORE: vector.ph: 195; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 196; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 197; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] 198; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] 199; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 200; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 201; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 202; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 203; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 204; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP2]] 205; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00 206; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 207; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 208; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 209; VEC2_INTERL1_PRED_STORE: vector.body: 210; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 211; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 212; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 213; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 214; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 215; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 216; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 217; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] 218; VEC2_INTERL1_PRED_STORE: middle.block: 219; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 220; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 221; VEC2_INTERL1_PRED_STORE: for.body: 222; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 223; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 224; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 225; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 226; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub fast float [[X_05]], [[FPINC]] 227; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 228; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 229; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 230; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 231; VEC2_INTERL1_PRED_STORE: for.end: 232; VEC2_INTERL1_PRED_STORE-NEXT: ret void 233; 234entry: 235 %cmp4 = icmp sgt i32 %N, 0 236 br i1 %cmp4, label %for.body.lr.ph, label %for.end 237 238for.body.lr.ph: ; preds = %entry 239 %fpinc = load float, ptr @fp_inc, align 4 240 br label %for.body 241 242for.body: ; preds = %for.body, %for.body.lr.ph 243 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 244 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 245 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 246 store float %x.05, ptr %arrayidx, align 4 247 %add = fsub fast float %x.05, %fpinc 248 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 249 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 250 %exitcond = icmp eq i32 %lftr.wideiv, %N 251 br i1 %exitcond, label %for.end.loopexit, label %for.body 252 253for.end.loopexit: ; preds = %for.body 254 br label %for.end 255 256for.end: ; preds = %for.end.loopexit, %entry 257 ret void 258} 259 260; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become 261; 'fast' spontaneously - FMF should propagate from the original IR. 262 263define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 %N) { 264; 265; 266; 267; 268; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF( 269; VEC4_INTERL1-NEXT: entry: 270; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 271; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 272; VEC4_INTERL1: for.body.lr.ph: 273; VEC4_INTERL1-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 274; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 275; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 276; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 277; VEC4_INTERL1: vector.ph: 278; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 279; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 280; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] 281; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] 282; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 283; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 284; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 285; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 286; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 287; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]] 288; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00 289; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 290; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer 291; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 292; VEC4_INTERL1: vector.body: 293; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 294; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 295; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 296; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4 297; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 298; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 299; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 300; VEC4_INTERL1-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 301; VEC4_INTERL1: middle.block: 302; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 303; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 304; VEC4_INTERL1: scalar.ph: 305; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 306; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 307; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 308; VEC4_INTERL1: for.body: 309; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 310; VEC4_INTERL1-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 311; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 312; VEC4_INTERL1-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 313; VEC4_INTERL1-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 314; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 315; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 316; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 317; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 318; VEC4_INTERL1: for.end.loopexit: 319; VEC4_INTERL1-NEXT: br label [[FOR_END]] 320; VEC4_INTERL1: for.end: 321; VEC4_INTERL1-NEXT: ret void 322; 323; VEC4_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 324; VEC4_INTERL2-NEXT: entry: 325; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 326; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 327; VEC4_INTERL2: for.body.lr.ph: 328; VEC4_INTERL2-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 329; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 330; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 331; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 332; VEC4_INTERL2: vector.ph: 333; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 334; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 335; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] 336; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] 337; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 338; VEC4_INTERL2-NEXT: [[MUL:%.*]] = fmul reassoc <4 x float> [[DOTSPLATINSERT2]], <float 4.000000e+00, float poison, float poison, float poison> 339; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[MUL]], <4 x float> poison, <4 x i32> zeroinitializer 340; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 341; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 342; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0 343; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT1]], <4 x float> poison, <4 x i32> zeroinitializer 344; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 345; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP2]] 346; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 347; VEC4_INTERL2: vector.body: 348; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 349; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 350; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]] 351; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 352; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16 353; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP4]], align 4 354; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP5]], align 4 355; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 356; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]] 357; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 358; VEC4_INTERL2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 359; VEC4_INTERL2: middle.block: 360; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 361; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 362; VEC4_INTERL2: scalar.ph: 363; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 364; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 365; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 366; VEC4_INTERL2: for.body: 367; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 368; VEC4_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 369; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 370; VEC4_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 371; VEC4_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 372; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 373; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 374; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 375; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 376; VEC4_INTERL2: for.end.loopexit: 377; VEC4_INTERL2-NEXT: br label [[FOR_END]] 378; VEC4_INTERL2: for.end: 379; VEC4_INTERL2-NEXT: ret void 380; 381; VEC1_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF( 382; VEC1_INTERL2-NEXT: entry: 383; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 384; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 385; VEC1_INTERL2: for.body.lr.ph: 386; VEC1_INTERL2-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 387; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 388; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 389; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 390; VEC1_INTERL2: vector.ph: 391; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 392; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 393; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] 394; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] 395; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 396; VEC1_INTERL2: vector.body: 397; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 398; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 399; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 400; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST2]] 401; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP3]] 402; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fmul reassoc float [[FPINC]], 0.000000e+00 403; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP4]] 404; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]] 405; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 406; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] 407; VEC1_INTERL2-NEXT: store float [[TMP5]], ptr [[TMP7]], align 4 408; VEC1_INTERL2-NEXT: store float [[TMP6]], ptr [[TMP8]], align 4 409; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 410; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 411; VEC1_INTERL2-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 412; VEC1_INTERL2: middle.block: 413; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 414; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 415; VEC1_INTERL2: scalar.ph: 416; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 417; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 418; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 419; VEC1_INTERL2: for.body: 420; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 421; VEC1_INTERL2-NEXT: [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 422; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 423; VEC1_INTERL2-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 424; VEC1_INTERL2-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 425; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 426; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 427; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 428; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 429; VEC1_INTERL2: for.end.loopexit: 430; VEC1_INTERL2-NEXT: br label [[FOR_END]] 431; VEC1_INTERL2: for.end: 432; VEC1_INTERL2-NEXT: ret void 433; 434; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF( 435; VEC2_INTERL1_PRED_STORE-NEXT: entry: 436; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 437; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 438; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 439; VEC2_INTERL1_PRED_STORE-NEXT: [[FPINC:%.*]] = load float, ptr @fp_inc, align 4 440; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 441; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 442; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 443; VEC2_INTERL1_PRED_STORE: vector.ph: 444; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 445; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 446; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] 447; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] 448; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 449; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 450; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0 451; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer 452; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00> 453; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP2]] 454; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00 455; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 456; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer 457; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 458; VEC2_INTERL1_PRED_STORE: vector.body: 459; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 460; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 461; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 462; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP4]], align 4 463; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 464; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]] 465; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 466; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 467; VEC2_INTERL1_PRED_STORE: middle.block: 468; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 469; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 470; VEC2_INTERL1_PRED_STORE: for.body: 471; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 472; VEC2_INTERL1_PRED_STORE-NEXT: [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 473; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 474; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_05]], ptr [[ARRAYIDX]], align 4 475; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]] 476; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 477; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 478; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 479; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 480; VEC2_INTERL1_PRED_STORE: for.end: 481; VEC2_INTERL1_PRED_STORE-NEXT: ret void 482; 483entry: 484 %cmp4 = icmp sgt i32 %N, 0 485 br i1 %cmp4, label %for.body.lr.ph, label %for.end 486 487for.body.lr.ph: ; preds = %entry 488 %fpinc = load float, ptr @fp_inc, align 4 489 br label %for.body 490 491for.body: ; preds = %for.body, %for.body.lr.ph 492 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 493 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 494 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 495 store float %x.05, ptr %arrayidx, align 4 496 %add = fsub reassoc float %x.05, %fpinc 497 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 498 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 499 %exitcond = icmp eq i32 %lftr.wideiv, %N 500 br i1 %exitcond, label %for.end.loopexit, label %for.body 501 502for.end.loopexit: ; preds = %for.body 503 br label %for.end 504 505for.end: ; preds = %for.end.loopexit, %entry 506 ret void 507} 508 509;void fp_iv_loop2(float init, ptr __restrict__ A, int N) { 510; float x = init; 511; for (int i=0; i < N; ++i) { 512; A[i] = x; 513; x += 0.5; 514; } 515;} 516 517 518define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { 519; VEC4_INTERL1-LABEL: @fp_iv_loop2( 520; VEC4_INTERL1-NEXT: entry: 521; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 522; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 523; VEC4_INTERL1: for.body.preheader: 524; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 525; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 526; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 527; VEC4_INTERL1: vector.ph: 528; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 529; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 530; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 531; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] 532; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 533; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 534; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 535; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 536; VEC4_INTERL1: vector.body: 537; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 538; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 539; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 540; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 541; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 542; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) 543; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 544; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 545; VEC4_INTERL1: middle.block: 546; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 547; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 548; VEC4_INTERL1: scalar.ph: 549; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 550; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 551; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 552; VEC4_INTERL1: for.body: 553; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 554; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 555; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 556; VEC4_INTERL1-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 557; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 558; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 559; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 560; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 561; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 562; VEC4_INTERL1: for.end.loopexit: 563; VEC4_INTERL1-NEXT: br label [[FOR_END]] 564; VEC4_INTERL1: for.end: 565; VEC4_INTERL1-NEXT: ret void 566; 567; VEC4_INTERL2-LABEL: @fp_iv_loop2( 568; VEC4_INTERL2-NEXT: entry: 569; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 570; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 571; VEC4_INTERL2: for.body.preheader: 572; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 573; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 574; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 575; VEC4_INTERL2: vector.ph: 576; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 577; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 578; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 579; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] 580; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 581; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 582; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 583; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 584; VEC4_INTERL2: vector.body: 585; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 586; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 587; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) 588; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 589; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 590; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 591; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 592; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 593; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) 594; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 595; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 596; VEC4_INTERL2: middle.block: 597; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 598; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 599; VEC4_INTERL2: scalar.ph: 600; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 601; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 602; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 603; VEC4_INTERL2: for.body: 604; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 605; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 606; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 607; VEC4_INTERL2-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 608; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 609; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 610; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 611; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 612; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 613; VEC4_INTERL2: for.end.loopexit: 614; VEC4_INTERL2-NEXT: br label [[FOR_END]] 615; VEC4_INTERL2: for.end: 616; VEC4_INTERL2-NEXT: ret void 617; 618; VEC1_INTERL2-LABEL: @fp_iv_loop2( 619; VEC1_INTERL2-NEXT: entry: 620; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 621; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 622; VEC1_INTERL2: for.body.preheader: 623; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 624; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 625; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 626; VEC1_INTERL2: vector.ph: 627; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 628; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 629; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 630; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] 631; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 632; VEC1_INTERL2: vector.body: 633; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 634; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 635; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 636; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01 637; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP3]] 638; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01 639; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 640; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] 641; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP5]], align 4 642; VEC1_INTERL2-NEXT: store float [[TMP4]], ptr [[TMP6]], align 4 643; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 644; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 645; VEC1_INTERL2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 646; VEC1_INTERL2: middle.block: 647; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 648; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 649; VEC1_INTERL2: scalar.ph: 650; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 651; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 652; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 653; VEC1_INTERL2: for.body: 654; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 655; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 656; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 657; VEC1_INTERL2-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 658; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 659; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 660; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 661; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 662; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 663; VEC1_INTERL2: for.end.loopexit: 664; VEC1_INTERL2-NEXT: br label [[FOR_END]] 665; VEC1_INTERL2: for.end: 666; VEC1_INTERL2-NEXT: ret void 667; 668; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2( 669; VEC2_INTERL1_PRED_STORE-NEXT: entry: 670; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 671; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 672; VEC2_INTERL1_PRED_STORE: for.body.preheader: 673; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 674; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 675; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 676; VEC2_INTERL1_PRED_STORE: vector.ph: 677; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 678; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 679; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 680; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[INIT:%.*]], [[TMP1]] 681; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 682; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 683; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01> 684; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 685; VEC2_INTERL1_PRED_STORE: vector.body: 686; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 687; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 688; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 689; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 690; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 691; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) 692; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 693; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 694; VEC2_INTERL1_PRED_STORE: middle.block: 695; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 696; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 697; VEC2_INTERL1_PRED_STORE: for.body: 698; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 699; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ] 700; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 701; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 702; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 703; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 704; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 705; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 706; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 707; VEC2_INTERL1_PRED_STORE: for.end: 708; VEC2_INTERL1_PRED_STORE-NEXT: ret void 709; 710entry: 711 %cmp4 = icmp sgt i32 %N, 0 712 br i1 %cmp4, label %for.body.preheader, label %for.end 713 714for.body.preheader: ; preds = %entry 715 br label %for.body 716 717for.body: ; preds = %for.body.preheader, %for.body 718 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 719 %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ] 720 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 721 store float %x.06, ptr %arrayidx, align 4 722 %conv1 = fadd fast float %x.06, 5.000000e-01 723 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 724 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 725 %exitcond = icmp eq i32 %lftr.wideiv, %N 726 br i1 %exitcond, label %for.end.loopexit, label %for.body 727 728for.end.loopexit: ; preds = %for.body 729 br label %for.end 730 731for.end: ; preds = %for.end.loopexit, %entry 732 ret void 733} 734 735;void fp_iv_loop3(float init, ptr __restrict__ A, ptr __restrict__ B, ptr __restrict__ C, int N) { 736; int i = 0; 737; float x = init; 738; float y = 0.1; 739; for (; i < N; ++i) { 740; A[i] = x; 741; x += fp_inc; 742; y -= 0.5; 743; B[i] = x + y; 744; C[i] = y; 745; } 746;} 747 748 749define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias nocapture %B, ptr noalias nocapture %C, i32 %N) { 750; VEC4_INTERL1-LABEL: @fp_iv_loop3( 751; VEC4_INTERL1-NEXT: entry: 752; VEC4_INTERL1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 753; VEC4_INTERL1-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 754; VEC4_INTERL1: for.body.lr.ph: 755; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 756; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 757; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 758; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 759; VEC4_INTERL1: vector.ph: 760; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483644 761; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 762; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 763; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 764; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float 765; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] 766; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] 767; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 768; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 769; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 770; VEC4_INTERL1-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer 771; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 772; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] 773; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 4.000000e+00 774; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 775; VEC4_INTERL1-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer 776; VEC4_INTERL1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 777; VEC4_INTERL1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 778; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 779; VEC4_INTERL1: vector.body: 780; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 781; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 782; VEC4_INTERL1-NEXT: [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 783; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 784; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 785; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 786; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) 787; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[TMP8]], [[TMP7]] 788; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 789; VEC4_INTERL1-NEXT: store <4 x float> [[TMP9]], ptr [[TMP10]], align 4 790; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] 791; VEC4_INTERL1-NEXT: store <4 x float> [[TMP8]], ptr [[TMP11]], align 4 792; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 793; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.000000e+00) 794; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]] 795; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 796; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 797; VEC4_INTERL1: middle.block: 798; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] 799; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 800; VEC4_INTERL1: scalar.ph: 801; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 802; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 803; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 804; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 805; VEC4_INTERL1: for.body: 806; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 807; VEC4_INTERL1-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 808; VEC4_INTERL1-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 809; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 810; VEC4_INTERL1-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 811; VEC4_INTERL1-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 812; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 813; VEC4_INTERL1-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 814; VEC4_INTERL1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]] 815; VEC4_INTERL1-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 816; VEC4_INTERL1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]] 817; VEC4_INTERL1-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 818; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 819; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 820; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 821; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 822; VEC4_INTERL1: for.end.loopexit: 823; VEC4_INTERL1-NEXT: br label [[FOR_END]] 824; VEC4_INTERL1: for.end: 825; VEC4_INTERL1-NEXT: ret void 826; 827; VEC4_INTERL2-LABEL: @fp_iv_loop3( 828; VEC4_INTERL2-NEXT: entry: 829; VEC4_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 830; VEC4_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 831; VEC4_INTERL2: for.body.lr.ph: 832; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 833; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 834; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 835; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 836; VEC4_INTERL2: vector.ph: 837; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483640 838; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 839; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 840; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 841; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float 842; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] 843; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] 844; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 845; VEC4_INTERL2-NEXT: [[BROADCAST:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer 846; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = fmul fast <4 x float> [[BROADCAST]], splat (float 4.000000e+00) 847; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 848; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 849; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0 850; VEC4_INTERL2-NEXT: [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer 851; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 852; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP4]] 853; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 854; VEC4_INTERL2: vector.body: 855; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 856; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 857; VEC4_INTERL2-NEXT: [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ] 858; VEC4_INTERL2-NEXT: [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT5]] 859; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 860; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 16 861; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND10]], ptr [[TMP6]], align 4 862; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD11]], ptr [[TMP7]], align 4 863; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST]] 864; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST]] 865; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -5.000000e-01) 866; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float -2.500000e+00) 867; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[TMP10]], [[TMP8]] 868; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP9]] 869; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 870; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP14]], i64 16 871; VEC4_INTERL2-NEXT: store <4 x float> [[TMP12]], ptr [[TMP14]], align 4 872; VEC4_INTERL2-NEXT: store <4 x float> [[TMP13]], ptr [[TMP15]], align 4 873; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] 874; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP16]], i64 16 875; VEC4_INTERL2-NEXT: store <4 x float> [[TMP10]], ptr [[TMP16]], align 4 876; VEC4_INTERL2-NEXT: store <4 x float> [[TMP11]], ptr [[TMP17]], align 4 877; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 878; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float -4.000000e+00) 879; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT5]] 880; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 881; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 882; VEC4_INTERL2: middle.block: 883; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] 884; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 885; VEC4_INTERL2: scalar.ph: 886; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 887; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 888; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 889; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 890; VEC4_INTERL2: for.body: 891; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 892; VEC4_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 893; VEC4_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 894; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 895; VEC4_INTERL2-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 896; VEC4_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 897; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 898; VEC4_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 899; VEC4_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]] 900; VEC4_INTERL2-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 901; VEC4_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]] 902; VEC4_INTERL2-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 903; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 904; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 905; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 906; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 907; VEC4_INTERL2: for.end.loopexit: 908; VEC4_INTERL2-NEXT: br label [[FOR_END]] 909; VEC4_INTERL2: for.end: 910; VEC4_INTERL2-NEXT: ret void 911; 912; VEC1_INTERL2-LABEL: @fp_iv_loop3( 913; VEC1_INTERL2-NEXT: entry: 914; VEC1_INTERL2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 915; VEC1_INTERL2-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 916; VEC1_INTERL2: for.body.lr.ph: 917; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 918; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 919; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 920; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 921; VEC1_INTERL2: vector.ph: 922; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 923; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 924; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 925; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 926; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float 927; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] 928; VEC1_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] 929; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 930; VEC1_INTERL2: vector.body: 931; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 932; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = or disjoint i64 [[INDEX]], 1 933; VEC1_INTERL2-NEXT: [[DOTCAST5:%.*]] = sitofp i64 [[INDEX]] to float 934; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[DOTCAST5]], -5.000000e-01 935; VEC1_INTERL2-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[INDEX]] to float 936; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fmul fast float [[TMP0]], [[DOTCAST6]] 937; VEC1_INTERL2-NEXT: [[OFFSET_IDX7:%.*]] = fadd fast float [[INIT]], [[TMP6]] 938; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]] 939; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 940; VEC1_INTERL2-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]] 941; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX7]], ptr [[TMP8]], align 4 942; VEC1_INTERL2-NEXT: store float [[TMP7]], ptr [[TMP9]], align 4 943; VEC1_INTERL2-NEXT: [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX7]], [[TMP0]] 944; VEC1_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP7]], [[TMP0]] 945; VEC1_INTERL2-NEXT: [[TMP12:%.*]] = fadd fast float [[TMP5]], 0xBFD99999A0000000 946; VEC1_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[TMP5]], 0xBFECCCCCC0000000 947; VEC1_INTERL2-NEXT: [[TMP14:%.*]] = fadd fast float [[TMP12]], [[TMP10]] 948; VEC1_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP13]], [[TMP11]] 949; VEC1_INTERL2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 950; VEC1_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] 951; VEC1_INTERL2-NEXT: store float [[TMP14]], ptr [[TMP16]], align 4 952; VEC1_INTERL2-NEXT: store float [[TMP15]], ptr [[TMP17]], align 4 953; VEC1_INTERL2-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] 954; VEC1_INTERL2-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP4]] 955; VEC1_INTERL2-NEXT: store float [[TMP12]], ptr [[TMP18]], align 4 956; VEC1_INTERL2-NEXT: store float [[TMP13]], ptr [[TMP19]], align 4 957; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 958; VEC1_INTERL2-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 959; VEC1_INTERL2-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 960; VEC1_INTERL2: middle.block: 961; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] 962; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 963; VEC1_INTERL2: scalar.ph: 964; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 965; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 966; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL4:%.*]] = phi float [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 967; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 968; VEC1_INTERL2: for.body: 969; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 970; VEC1_INTERL2-NEXT: [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ] 971; VEC1_INTERL2-NEXT: [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 972; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 973; VEC1_INTERL2-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 974; VEC1_INTERL2-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 975; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 976; VEC1_INTERL2-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 977; VEC1_INTERL2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]] 978; VEC1_INTERL2-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 979; VEC1_INTERL2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds nuw float, ptr [[C]], i64 [[INDVARS_IV]] 980; VEC1_INTERL2-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 981; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 982; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 983; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 984; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 985; VEC1_INTERL2: for.end.loopexit: 986; VEC1_INTERL2-NEXT: br label [[FOR_END]] 987; VEC1_INTERL2: for.end: 988; VEC1_INTERL2-NEXT: ret void 989; 990; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3( 991; VEC2_INTERL1_PRED_STORE-NEXT: entry: 992; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0 993; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] 994; VEC2_INTERL1_PRED_STORE: for.body.lr.ph: 995; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 996; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 997; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 998; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 999; VEC2_INTERL1_PRED_STORE: vector.ph: 1000; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 1001; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1002; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 1003; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 1004; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1005; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] 1006; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[INIT:%.*]], [[TMP3]] 1007; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 1008; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1009; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1010; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer 1011; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00> 1012; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP4]] 1013; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0]], 2.000000e+00 1014; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 1015; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer 1016; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0 1017; VEC2_INTERL1_PRED_STORE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer 1018; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1019; VEC2_INTERL1_PRED_STORE: vector.body: 1020; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1021; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1022; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ] 1023; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1024; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND9]], ptr [[TMP6]], align 4 1025; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]] 1026; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = fadd fast <2 x float> [[VEC_IND]], splat (float -5.000000e-01) 1027; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = fadd fast <2 x float> [[TMP8]], [[TMP7]] 1028; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 1029; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP10]], align 4 1030; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] 1031; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[TMP8]], ptr [[TMP11]], align 4 1032; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1033; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float -1.000000e+00) 1034; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]] 1035; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1036; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 1037; VEC2_INTERL1_PRED_STORE: middle.block: 1038; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP1]] 1039; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1040; VEC2_INTERL1_PRED_STORE: for.body: 1041; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ] 1042; VEC2_INTERL1_PRED_STORE-NEXT: [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ] 1043; VEC2_INTERL1_PRED_STORE-NEXT: [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ] 1044; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1045; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_011]], ptr [[ARRAYIDX]], align 4 1046; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD]] = fadd fast float [[X_011]], [[TMP0]] 1047; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01 1048; VEC2_INTERL1_PRED_STORE-NEXT: [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]] 1049; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 1050; VEC2_INTERL1_PRED_STORE-NEXT: store float [[ADD2]], ptr [[ARRAYIDX4]], align 4 1051; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] 1052; VEC2_INTERL1_PRED_STORE-NEXT: store float [[CONV1]], ptr [[ARRAYIDX6]], align 4 1053; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1054; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1055; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1056; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 1057; VEC2_INTERL1_PRED_STORE: for.end: 1058; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1059; 1060entry: 1061 %cmp9 = icmp sgt i32 %N, 0 1062 br i1 %cmp9, label %for.body.lr.ph, label %for.end 1063 1064for.body.lr.ph: ; preds = %entry 1065 %0 = load float, ptr @fp_inc, align 4 1066 br label %for.body 1067 1068for.body: ; preds = %for.body, %for.body.lr.ph 1069 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 1070 %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ] 1071 %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 1072 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 1073 store float %x.011, ptr %arrayidx, align 4 1074 %add = fadd fast float %x.011, %0 1075 %conv1 = fadd fast float %y.012, -5.000000e-01 1076 %add2 = fadd fast float %conv1, %add 1077 %arrayidx4 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 1078 store float %add2, ptr %arrayidx4, align 4 1079 %arrayidx6 = getelementptr inbounds float, ptr %C, i64 %indvars.iv 1080 store float %conv1, ptr %arrayidx6, align 4 1081 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1082 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1083 %exitcond = icmp eq i32 %lftr.wideiv, %N 1084 br i1 %exitcond, label %for.end.loopexit, label %for.body 1085 1086for.end.loopexit: 1087 br label %for.end 1088 1089for.end: 1090 ret void 1091} 1092 1093; Start and step values are constants. There is no 'fmul' operation in this case 1094;void fp_iv_loop4(ptr __restrict__ A, int N) { 1095; float x = 1.0; 1096; for (int i=0; i < N; ++i) { 1097; A[i] = x; 1098; x += 0.5; 1099; } 1100;} 1101 1102 1103define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { 1104; VEC4_INTERL1-LABEL: @fp_iv_loop4( 1105; VEC4_INTERL1-NEXT: entry: 1106; VEC4_INTERL1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1107; VEC4_INTERL1-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1108; VEC4_INTERL1: for.body.preheader: 1109; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 1110; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 1111; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1112; VEC4_INTERL1: vector.ph: 1113; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 1114; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1115; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 1116; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 1117; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1118; VEC4_INTERL1: vector.body: 1119; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1120; VEC4_INTERL1-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1121; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1122; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 1123; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1124; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) 1125; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1126; VEC4_INTERL1-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1127; VEC4_INTERL1: middle.block: 1128; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 1129; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1130; VEC4_INTERL1: scalar.ph: 1131; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1132; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1133; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1134; VEC4_INTERL1: for.body: 1135; VEC4_INTERL1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1136; VEC4_INTERL1-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1137; VEC4_INTERL1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 1138; VEC4_INTERL1-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 1139; VEC4_INTERL1-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1140; VEC4_INTERL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1141; VEC4_INTERL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1142; VEC4_INTERL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1143; VEC4_INTERL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1144; VEC4_INTERL1: for.end.loopexit: 1145; VEC4_INTERL1-NEXT: br label [[FOR_END]] 1146; VEC4_INTERL1: for.end: 1147; VEC4_INTERL1-NEXT: ret void 1148; 1149; VEC4_INTERL2-LABEL: @fp_iv_loop4( 1150; VEC4_INTERL2-NEXT: entry: 1151; VEC4_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1152; VEC4_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1153; VEC4_INTERL2: for.body.preheader: 1154; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 1155; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8 1156; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1157; VEC4_INTERL2: vector.ph: 1158; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 1159; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1160; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 1161; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 1162; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1163; VEC4_INTERL2: vector.body: 1164; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1165; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1166; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], splat (float 2.000000e+00) 1167; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1168; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 16 1169; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], ptr [[TMP2]], align 4 1170; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], ptr [[TMP3]], align 4 1171; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1172; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], splat (float 4.000000e+00) 1173; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1174; VEC4_INTERL2-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1175; VEC4_INTERL2: middle.block: 1176; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 1177; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1178; VEC4_INTERL2: scalar.ph: 1179; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1180; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1181; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1182; VEC4_INTERL2: for.body: 1183; VEC4_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1184; VEC4_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1185; VEC4_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 1186; VEC4_INTERL2-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 1187; VEC4_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1188; VEC4_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1189; VEC4_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1190; VEC4_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1191; VEC4_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1192; VEC4_INTERL2: for.end.loopexit: 1193; VEC4_INTERL2-NEXT: br label [[FOR_END]] 1194; VEC4_INTERL2: for.end: 1195; VEC4_INTERL2-NEXT: ret void 1196; 1197; VEC1_INTERL2-LABEL: @fp_iv_loop4( 1198; VEC1_INTERL2-NEXT: entry: 1199; VEC1_INTERL2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1200; VEC1_INTERL2-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1201; VEC1_INTERL2: for.body.preheader: 1202; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 1203; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 1204; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1205; VEC1_INTERL2: vector.ph: 1206; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 1207; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1208; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 1209; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 1210; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1211; VEC1_INTERL2: vector.body: 1212; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1213; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 1214; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 1215; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[DOTCAST2]], 5.000000e-01 1216; VEC1_INTERL2-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[TMP3]], 1.000000e+00 1217; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = fadd fast float [[TMP3]], 1.500000e+00 1218; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1219; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] 1220; VEC1_INTERL2-NEXT: store float [[OFFSET_IDX]], ptr [[TMP5]], align 4 1221; VEC1_INTERL2-NEXT: store float [[TMP4]], ptr [[TMP6]], align 4 1222; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1223; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1224; VEC1_INTERL2-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1225; VEC1_INTERL2: middle.block: 1226; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 1227; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1228; VEC1_INTERL2: scalar.ph: 1229; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1230; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1231; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1232; VEC1_INTERL2: for.body: 1233; VEC1_INTERL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1234; VEC1_INTERL2-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1235; VEC1_INTERL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] 1236; VEC1_INTERL2-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 1237; VEC1_INTERL2-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1238; VEC1_INTERL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1239; VEC1_INTERL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1240; VEC1_INTERL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1241; VEC1_INTERL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1242; VEC1_INTERL2: for.end.loopexit: 1243; VEC1_INTERL2-NEXT: br label [[FOR_END]] 1244; VEC1_INTERL2: for.end: 1245; VEC1_INTERL2-NEXT: ret void 1246; 1247; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4( 1248; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1249; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 1250; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 1251; VEC2_INTERL1_PRED_STORE: for.body.preheader: 1252; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 1253; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[N]], 1 1254; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1255; VEC2_INTERL1_PRED_STORE: vector.ph: 1256; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 1257; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1258; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 1259; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 1260; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1261; VEC2_INTERL1_PRED_STORE: vector.body: 1262; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1263; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1264; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1265; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP2]], align 4 1266; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1267; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], splat (float 1.000000e+00) 1268; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1269; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 1270; VEC2_INTERL1_PRED_STORE: middle.block: 1271; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 1272; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]] 1273; VEC2_INTERL1_PRED_STORE: for.body: 1274; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 1275; VEC2_INTERL1_PRED_STORE-NEXT: [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ] 1276; VEC2_INTERL1_PRED_STORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 1277; VEC2_INTERL1_PRED_STORE-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 1278; VEC2_INTERL1_PRED_STORE-NEXT: [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01 1279; VEC2_INTERL1_PRED_STORE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1280; VEC2_INTERL1_PRED_STORE-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1281; VEC2_INTERL1_PRED_STORE-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1282; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 1283; VEC2_INTERL1_PRED_STORE: for.end: 1284; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1285; 1286entry: 1287 %cmp4 = icmp sgt i32 %N, 0 1288 br i1 %cmp4, label %for.body.preheader, label %for.end 1289 1290for.body.preheader: ; preds = %entry 1291 br label %for.body 1292 1293for.body: ; preds = %for.body.preheader, %for.body 1294 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 1295 %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] 1296 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 1297 store float %x.06, ptr %arrayidx, align 4 1298 %conv1 = fadd fast float %x.06, 5.000000e-01 1299 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1300 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1301 %exitcond = icmp eq i32 %lftr.wideiv, %N 1302 br i1 %exitcond, label %for.end.loopexit, label %for.body 1303 1304for.end.loopexit: ; preds = %for.body 1305 br label %for.end 1306 1307for.end: ; preds = %for.end.loopexit, %entry 1308 ret void 1309} 1310 1311 1312define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { 1313; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar( 1314; VEC4_INTERL1-NEXT: entry: 1315; VEC4_INTERL1-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1316; VEC4_INTERL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 4 1317; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1318; VEC4_INTERL1: vector.ph: 1319; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 1320; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1321; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1322; VEC4_INTERL1: vector.body: 1323; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] 1324; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 1325; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1326; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 1327; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1328; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0 1329; VEC4_INTERL1-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1330; VEC4_INTERL1: pred.store.if: 1331; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] 1332; VEC4_INTERL1-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4 1333; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE]] 1334; VEC4_INTERL1: pred.store.continue: 1335; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1 1336; VEC4_INTERL1-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] 1337; VEC4_INTERL1: pred.store.if2: 1338; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1 1339; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] 1340; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[DOTCAST2]], 1.000000e+00 1341; VEC4_INTERL1-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 1342; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE3]] 1343; VEC4_INTERL1: pred.store.continue3: 1344; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2 1345; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] 1346; VEC4_INTERL1: pred.store.if4: 1347; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 2 1348; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] 1349; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[DOTCAST2]], 2.000000e+00 1350; VEC4_INTERL1-NEXT: store float [[TMP11]], ptr [[TMP10]], align 4 1351; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE5]] 1352; VEC4_INTERL1: pred.store.continue5: 1353; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3 1354; VEC4_INTERL1-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] 1355; VEC4_INTERL1: pred.store.if6: 1356; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 3 1357; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]] 1358; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[DOTCAST2]], 3.000000e+00 1359; VEC4_INTERL1-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4 1360; VEC4_INTERL1-NEXT: br label [[PRED_STORE_CONTINUE7]] 1361; VEC4_INTERL1: pred.store.continue7: 1362; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1363; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1364; VEC4_INTERL1-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1365; VEC4_INTERL1: middle.block: 1366; VEC4_INTERL1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1367; VEC4_INTERL1-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1368; VEC4_INTERL1: scalar.ph: 1369; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1370; VEC4_INTERL1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[DOTCAST]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1371; VEC4_INTERL1-NEXT: br label [[FOR_BODY:%.*]] 1372; VEC4_INTERL1: for.body: 1373; VEC4_INTERL1-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1374; VEC4_INTERL1-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1375; VEC4_INTERL1-NEXT: [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]] 1376; VEC4_INTERL1-NEXT: [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4 1377; VEC4_INTERL1-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1378; VEC4_INTERL1-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1379; VEC4_INTERL1: if.pred: 1380; VEC4_INTERL1-NEXT: store float [[J]], ptr [[VAR0]], align 4 1381; VEC4_INTERL1-NEXT: br label [[FOR_INC]] 1382; VEC4_INTERL1: for.inc: 1383; VEC4_INTERL1-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1384; VEC4_INTERL1-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1385; VEC4_INTERL1-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1386; VEC4_INTERL1-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1387; VEC4_INTERL1: for.end: 1388; VEC4_INTERL1-NEXT: ret void 1389; 1390; VEC4_INTERL2-LABEL: @non_primary_iv_float_scalar( 1391; VEC4_INTERL2-NEXT: entry: 1392; VEC4_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1393; VEC4_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 8 1394; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1395; VEC4_INTERL2: vector.ph: 1396; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 1397; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1398; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1399; VEC4_INTERL2: vector.body: 1400; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] 1401; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 1402; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1403; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 1404; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 1405; VEC4_INTERL2-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1406; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer 1407; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer 1408; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0 1409; VEC4_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1410; VEC4_INTERL2: pred.store.if: 1411; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] 1412; VEC4_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP35]], align 4 1413; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1414; VEC4_INTERL2: pred.store.continue: 1415; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 1416; VEC4_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] 1417; VEC4_INTERL2: pred.store.if3: 1418; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1 1419; VEC4_INTERL2-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP7]] 1420; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = fadd fast float [[DOTCAST2]], 1.000000e+00 1421; VEC4_INTERL2-NEXT: store float [[TMP9]], ptr [[TMP8]], align 4 1422; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE4]] 1423; VEC4_INTERL2: pred.store.continue4: 1424; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 1425; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] 1426; VEC4_INTERL2: pred.store.if5: 1427; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 2 1428; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] 1429; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = fadd fast float [[DOTCAST2]], 2.000000e+00 1430; VEC4_INTERL2-NEXT: store float [[TMP13]], ptr [[TMP12]], align 4 1431; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE6]] 1432; VEC4_INTERL2: pred.store.continue6: 1433; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 1434; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] 1435; VEC4_INTERL2: pred.store.if7: 1436; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = or disjoint i64 [[INDEX]], 3 1437; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] 1438; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = fadd fast float [[DOTCAST2]], 3.000000e+00 1439; VEC4_INTERL2-NEXT: store float [[TMP17]], ptr [[TMP16]], align 4 1440; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE8]] 1441; VEC4_INTERL2: pred.store.continue8: 1442; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 1443; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] 1444; VEC4_INTERL2: pred.store.if9: 1445; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 4 1446; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP19]] 1447; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = fadd fast float [[DOTCAST2]], 4.000000e+00 1448; VEC4_INTERL2-NEXT: store float [[TMP21]], ptr [[TMP20]], align 4 1449; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE10]] 1450; VEC4_INTERL2: pred.store.continue10: 1451; VEC4_INTERL2-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1 1452; VEC4_INTERL2-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] 1453; VEC4_INTERL2: pred.store.if11: 1454; VEC4_INTERL2-NEXT: [[TMP23:%.*]] = or disjoint i64 [[INDEX]], 5 1455; VEC4_INTERL2-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP23]] 1456; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = fadd fast float [[DOTCAST2]], 5.000000e+00 1457; VEC4_INTERL2-NEXT: store float [[TMP25]], ptr [[TMP24]], align 4 1458; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE12]] 1459; VEC4_INTERL2: pred.store.continue12: 1460; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2 1461; VEC4_INTERL2-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] 1462; VEC4_INTERL2: pred.store.if13: 1463; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or disjoint i64 [[INDEX]], 6 1464; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]] 1465; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = fadd fast float [[DOTCAST2]], 6.000000e+00 1466; VEC4_INTERL2-NEXT: store float [[TMP29]], ptr [[TMP28]], align 4 1467; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE14]] 1468; VEC4_INTERL2: pred.store.continue14: 1469; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3 1470; VEC4_INTERL2-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] 1471; VEC4_INTERL2: pred.store.if15: 1472; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 7 1473; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] 1474; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = fadd fast float [[DOTCAST2]], 7.000000e+00 1475; VEC4_INTERL2-NEXT: store float [[TMP33]], ptr [[TMP32]], align 4 1476; VEC4_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE16]] 1477; VEC4_INTERL2: pred.store.continue16: 1478; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1479; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1480; VEC4_INTERL2-NEXT: br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1481; VEC4_INTERL2: middle.block: 1482; VEC4_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1483; VEC4_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1484; VEC4_INTERL2: scalar.ph: 1485; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1486; VEC4_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[DOTCAST]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1487; VEC4_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1488; VEC4_INTERL2: for.body: 1489; VEC4_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1490; VEC4_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1491; VEC4_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]] 1492; VEC4_INTERL2-NEXT: [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4 1493; VEC4_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1494; VEC4_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1495; VEC4_INTERL2: if.pred: 1496; VEC4_INTERL2-NEXT: store float [[J]], ptr [[VAR0]], align 4 1497; VEC4_INTERL2-NEXT: br label [[FOR_INC]] 1498; VEC4_INTERL2: for.inc: 1499; VEC4_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1500; VEC4_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1501; VEC4_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1502; VEC4_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1503; VEC4_INTERL2: for.end: 1504; VEC4_INTERL2-NEXT: ret void 1505; 1506; VEC1_INTERL2-LABEL: @non_primary_iv_float_scalar( 1507; VEC1_INTERL2-NEXT: entry: 1508; VEC1_INTERL2-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1509; VEC1_INTERL2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 2 1510; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1511; VEC1_INTERL2: vector.ph: 1512; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1513; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1514; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1515; VEC1_INTERL2: vector.body: 1516; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 1517; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1 1518; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 1519; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1520; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 1521; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP1]], align 4 1522; VEC1_INTERL2-NEXT: [[TMP4:%.*]] = load float, ptr [[TMP2]], align 4 1523; VEC1_INTERL2-NEXT: [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00 1524; VEC1_INTERL2-NEXT: [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00 1525; VEC1_INTERL2-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1526; VEC1_INTERL2: pred.store.if: 1527; VEC1_INTERL2-NEXT: store float [[DOTCAST2]], ptr [[TMP1]], align 4 1528; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE]] 1529; VEC1_INTERL2: pred.store.continue: 1530; VEC1_INTERL2-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 1531; VEC1_INTERL2: pred.store.if2: 1532; VEC1_INTERL2-NEXT: [[TMP7:%.*]] = fadd fast float [[DOTCAST2]], 1.000000e+00 1533; VEC1_INTERL2-NEXT: store float [[TMP7]], ptr [[TMP2]], align 4 1534; VEC1_INTERL2-NEXT: br label [[PRED_STORE_CONTINUE3]] 1535; VEC1_INTERL2: pred.store.continue3: 1536; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1537; VEC1_INTERL2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1538; VEC1_INTERL2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1539; VEC1_INTERL2: middle.block: 1540; VEC1_INTERL2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1541; VEC1_INTERL2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1542; VEC1_INTERL2: scalar.ph: 1543; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1544; VEC1_INTERL2-NEXT: [[BC_RESUME_VAL1:%.*]] = phi float [ [[DOTCAST]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1545; VEC1_INTERL2-NEXT: br label [[FOR_BODY:%.*]] 1546; VEC1_INTERL2: for.body: 1547; VEC1_INTERL2-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1548; VEC1_INTERL2-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ] 1549; VEC1_INTERL2-NEXT: [[VAR0:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[I]] 1550; VEC1_INTERL2-NEXT: [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4 1551; VEC1_INTERL2-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1552; VEC1_INTERL2-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1553; VEC1_INTERL2: if.pred: 1554; VEC1_INTERL2-NEXT: store float [[J]], ptr [[VAR0]], align 4 1555; VEC1_INTERL2-NEXT: br label [[FOR_INC]] 1556; VEC1_INTERL2: for.inc: 1557; VEC1_INTERL2-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1558; VEC1_INTERL2-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1559; VEC1_INTERL2-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1560; VEC1_INTERL2-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1561; VEC1_INTERL2: for.end: 1562; VEC1_INTERL2-NEXT: ret void 1563; 1564; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar( 1565; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1566; VEC2_INTERL1_PRED_STORE-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1) 1567; VEC2_INTERL1_PRED_STORE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 2 1568; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] 1569; VEC2_INTERL1_PRED_STORE: vector.ph: 1570; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 1571; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float 1572; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1573; VEC2_INTERL1_PRED_STORE: vector.body: 1574; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] 1575; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[INDEX]] to float 1576; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1577; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 1578; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer 1579; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 1580; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] 1581; VEC2_INTERL1_PRED_STORE: pred.store.if: 1582; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] 1583; VEC2_INTERL1_PRED_STORE-NEXT: store float [[DOTCAST2]], ptr [[TMP3]], align 4 1584; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE]] 1585; VEC2_INTERL1_PRED_STORE: pred.store.continue: 1586; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 1587; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] 1588; VEC2_INTERL1_PRED_STORE: pred.store.if2: 1589; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = or disjoint i64 [[INDEX]], 1 1590; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]] 1591; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[DOTCAST2]], 1.000000e+00 1592; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], ptr [[TMP6]], align 4 1593; VEC2_INTERL1_PRED_STORE-NEXT: br label [[PRED_STORE_CONTINUE3]] 1594; VEC2_INTERL1_PRED_STORE: pred.store.continue3: 1595; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1596; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1597; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 1598; VEC2_INTERL1_PRED_STORE: middle.block: 1599; VEC2_INTERL1_PRED_STORE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]] 1600; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1601; VEC2_INTERL1_PRED_STORE: for.body: 1602; VEC2_INTERL1_PRED_STORE-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1603; VEC2_INTERL1_PRED_STORE-NEXT: [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[DOTCAST]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ] 1604; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[I]] 1605; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR1:%.*]] = load float, ptr [[VAR0]], align 4 1606; VEC2_INTERL1_PRED_STORE-NEXT: [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00 1607; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]] 1608; VEC2_INTERL1_PRED_STORE: if.pred: 1609; VEC2_INTERL1_PRED_STORE-NEXT: store float [[J]], ptr [[VAR0]], align 4 1610; VEC2_INTERL1_PRED_STORE-NEXT: br label [[FOR_INC]] 1611; VEC2_INTERL1_PRED_STORE: for.inc: 1612; VEC2_INTERL1_PRED_STORE-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 1613; VEC2_INTERL1_PRED_STORE-NEXT: [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00 1614; VEC2_INTERL1_PRED_STORE-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] 1615; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]] 1616; VEC2_INTERL1_PRED_STORE: for.end: 1617; VEC2_INTERL1_PRED_STORE-NEXT: ret void 1618; 1619entry: 1620 br label %for.body 1621 1622for.body: 1623 %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ] 1624 %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ] 1625 %var0 = getelementptr inbounds float, ptr %A, i64 %i 1626 %var1 = load float, ptr %var0, align 4 1627 %var2 = fcmp fast oeq float %var1, 0.0 1628 br i1 %var2, label %if.pred, label %for.inc 1629 1630if.pred: 1631 store float %j, ptr %var0, align 4 1632 br label %for.inc 1633 1634for.inc: 1635 %i.next = add nuw nsw i64 %i, 1 1636 %j.next = fadd fast float %j, 1.0 1637 %cond = icmp slt i64 %i.next, %N 1638 br i1 %cond, label %for.body, label %for.end 1639 1640for.end: 1641 ret void 1642} 1643 1644define i32 @float_induction_with_dbg_on_fadd(ptr %dst) { 1645; VEC4_INTERL1-LABEL: @float_induction_with_dbg_on_fadd( 1646; VEC4_INTERL1-NEXT: entry: 1647; VEC4_INTERL1-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1648; VEC4_INTERL1: vector.ph: 1649; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] 1650; VEC4_INTERL1: vector.body: 1651; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1652; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = getelementptr float, ptr null, i64 [[INDEX]] 1653; VEC4_INTERL1-NEXT: store <4 x float> poison, ptr [[TMP0]], align 8 1654; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1655; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1656; VEC4_INTERL1-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1657; VEC4_INTERL1: middle.block: 1658; VEC4_INTERL1-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 1659; VEC4_INTERL1: scalar.ph: 1660; VEC4_INTERL1-NEXT: br label [[LOOP:%.*]] 1661; VEC4_INTERL1: loop: 1662; VEC4_INTERL1-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] 1663; VEC4_INTERL1: exit: 1664; VEC4_INTERL1-NEXT: ret i32 0 1665; 1666; VEC4_INTERL2-LABEL: @float_induction_with_dbg_on_fadd( 1667; VEC4_INTERL2-NEXT: entry: 1668; VEC4_INTERL2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1669; VEC4_INTERL2: vector.ph: 1670; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1671; VEC4_INTERL2: vector.body: 1672; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1673; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = getelementptr float, ptr null, i64 [[INDEX]] 1674; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 1675; VEC4_INTERL2-NEXT: store <4 x float> poison, ptr [[TMP0]], align 8 1676; VEC4_INTERL2-NEXT: store <4 x float> zeroinitializer, ptr [[TMP1]], align 8 1677; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 1678; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1679; VEC4_INTERL2-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1680; VEC4_INTERL2: middle.block: 1681; VEC4_INTERL2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 1682; VEC4_INTERL2: scalar.ph: 1683; VEC4_INTERL2-NEXT: br label [[LOOP:%.*]] 1684; VEC4_INTERL2: loop: 1685; VEC4_INTERL2-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] 1686; VEC4_INTERL2: exit: 1687; VEC4_INTERL2-NEXT: ret i32 0 1688; 1689; VEC1_INTERL2-LABEL: @float_induction_with_dbg_on_fadd( 1690; VEC1_INTERL2-NEXT: entry: 1691; VEC1_INTERL2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1692; VEC1_INTERL2: vector.ph: 1693; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] 1694; VEC1_INTERL2: vector.body: 1695; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1696; VEC1_INTERL2-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1 1697; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = getelementptr float, ptr null, i64 [[INDEX]] 1698; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr float, ptr null, i64 [[TMP0]] 1699; VEC1_INTERL2-NEXT: store float poison, ptr [[TMP1]], align 8 1700; VEC1_INTERL2-NEXT: store float poison, ptr [[TMP2]], align 8 1701; VEC1_INTERL2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1702; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1703; VEC1_INTERL2-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1704; VEC1_INTERL2: middle.block: 1705; VEC1_INTERL2-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] 1706; VEC1_INTERL2: scalar.ph: 1707; VEC1_INTERL2-NEXT: br label [[LOOP:%.*]] 1708; VEC1_INTERL2: loop: 1709; VEC1_INTERL2-NEXT: br i1 poison, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] 1710; VEC1_INTERL2: exit: 1711; VEC1_INTERL2-NEXT: ret i32 0 1712; 1713; VEC2_INTERL1_PRED_STORE-LABEL: @float_induction_with_dbg_on_fadd( 1714; VEC2_INTERL1_PRED_STORE-NEXT: entry: 1715; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] 1716; VEC2_INTERL1_PRED_STORE: vector.body: 1717; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1718; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = getelementptr float, ptr null, i64 [[INDEX]] 1719; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> poison, ptr [[TMP0]], align 8 1720; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 1721; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 1722; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 1723; VEC2_INTERL1_PRED_STORE: exit: 1724; VEC2_INTERL1_PRED_STORE-NEXT: ret i32 0 1725; 1726entry: 1727 br label %loop 1728 1729loop: 1730 %fp.iv = phi float [ 0.000000e+00, %entry ], [ %fp.iv.next, %loop ], !dbg !4 1731 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 1732 %fp.iv.next = fadd reassoc float %fp.iv, 0.000000e+00 1733 %gep = getelementptr float, ptr null, i64 %iv 1734 store float %fp.iv.next, ptr %gep, align 8 1735 %iv.next = add i64 %iv, 1 1736 %exitcond.not = icmp eq i64 %iv.next, 200 1737 br i1 %exitcond.not, label %exit, label %loop 1738 1739exit: 1740 ret i32 0 1741} 1742 1743!llvm.module.flags = !{!3} 1744 1745!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1) 1746!1 = !DIFile(filename: "bbi-99425.c", directory: "/tmp") 1747!2 = !{} 1748!3 = !{i32 2, !"Debug Info Version", i32 3} 1749!4 = !DILocation(line: 5, column: 12, scope: !8) 1750!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !9, unit: !0, retainedNodes: !2) 1751!9 = !DISubroutineType(types: !2) 1752