1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s 3 4define i32 @reduction_sum(i32 %n, ptr %A, ptr %B) { 5; CHECK-LABEL: define i32 @reduction_sum( 6; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 7; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N]], 0 8; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 9; CHECK: .lr.ph.preheader: 10; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 11; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 12; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 13; CHECK: vector.ph: 14; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 2147483644 15; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 16; CHECK: vector.body: 17; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 18; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 19; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 21; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 22; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 23; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 24; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] 25; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] 26; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] 27; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 28; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 29; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 30; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 31; CHECK: middle.block: 32; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) 33; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] 34; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 35; CHECK: scalar.ph: 36; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 37; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 38; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 39; CHECK: .lr.ph: 40; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 41; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP17:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 42; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 43; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 44; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 45; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 46; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 47; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SUM_02]], [[TMP14]] 48; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], [[TMP11]] 49; CHECK-NEXT: [[TMP17]] = add i32 [[TMP16]], [[TMP13]] 50; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 51; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 52; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 53; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]] 54; CHECK: ._crit_edge.loopexit: 55; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 56; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 57; CHECK: ._crit_edge: 58; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 59; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 60; 61 %1 = icmp sgt i32 %n, 0 62 br i1 %1, label %.lr.ph, label %._crit_edge 63 64.lr.ph: ; preds = %0, %.lr.ph 65 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 66 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] 67 %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 68 %3 = load i32, ptr %2, align 4 69 %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 70 %5 = load i32, ptr %4, align 4 71 %6 = trunc i64 %indvars.iv to i32 72 %7 = add i32 %sum.02, %6 73 %8 = add i32 %7, %3 74 %9 = add i32 %8, %5 75 %indvars.iv.next = add i64 %indvars.iv, 1 76 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 77 %exitcond = icmp eq i32 %lftr.wideiv, %n 78 br i1 %exitcond, label %._crit_edge, label %.lr.ph 79 80._crit_edge: ; preds = %.lr.ph, %0 81 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 82 ret i32 %sum.0.lcssa 83} 84 85define i32 @reduction_prod(i32 %n, ptr %A, ptr %B) { 86; CHECK-LABEL: define i32 @reduction_prod( 87; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 88; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N]], 0 89; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 90; CHECK: .lr.ph.preheader: 91; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 92; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 93; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 94; CHECK: vector.ph: 95; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 2147483644 96; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 97; CHECK: vector.body: 98; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 99; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 100; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 101; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 102; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 103; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 104; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 105; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[VEC_PHI]], [[VEC_IND]] 106; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], [[WIDE_LOAD]] 107; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[WIDE_LOAD1]] 108; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 109; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 110; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 111; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 112; CHECK: middle.block: 113; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]]) 114; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] 115; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 116; CHECK: scalar.ph: 117; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 118; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 1, [[DOTLR_PH_PREHEADER]] ] 119; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 120; CHECK: .lr.ph: 121; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 122; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[TMP17:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 123; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 124; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 125; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 126; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 127; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 128; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[PROD_02]], [[TMP14]] 129; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], [[TMP11]] 130; CHECK-NEXT: [[TMP17]] = mul i32 [[TMP16]], [[TMP13]] 131; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 132; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 133; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 134; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] 135; CHECK: ._crit_edge.loopexit: 136; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 137; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 138; CHECK: ._crit_edge: 139; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 140; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] 141; 142 %1 = icmp sgt i32 %n, 0 143 br i1 %1, label %.lr.ph, label %._crit_edge 144 145.lr.ph: ; preds = %0, %.lr.ph 146 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 147 %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ] 148 %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 149 %3 = load i32, ptr %2, align 4 150 %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 151 %5 = load i32, ptr %4, align 4 152 %6 = trunc i64 %indvars.iv to i32 153 %7 = mul i32 %prod.02, %6 154 %8 = mul i32 %7, %3 155 %9 = mul i32 %8, %5 156 %indvars.iv.next = add i64 %indvars.iv, 1 157 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 158 %exitcond = icmp eq i32 %lftr.wideiv, %n 159 br i1 %exitcond, label %._crit_edge, label %.lr.ph 160 161._crit_edge: ; preds = %.lr.ph, %0 162 %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ] 163 ret i32 %prod.0.lcssa 164} 165 166define i32 @reduction_mix(i32 %n, ptr %A, ptr %B) { 167; CHECK-LABEL: define i32 @reduction_mix( 168; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 169; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N]], 0 170; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 171; CHECK: .lr.ph.preheader: 172; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 173; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 174; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 175; CHECK: vector.ph: 176; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 2147483644 177; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 178; CHECK: vector.body: 179; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 180; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 181; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 182; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 183; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 184; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 185; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 186; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 187; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] 188; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[TMP5]] 189; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 190; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 191; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 192; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 193; CHECK: middle.block: 194; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) 195; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] 196; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 197; CHECK: scalar.ph: 198; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 199; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 200; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 201; CHECK: .lr.ph: 202; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 203; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP17:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 204; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 205; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 206; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 207; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 208; CHECK-NEXT: [[TMP14:%.*]] = mul nsw i32 [[TMP13]], [[TMP11]] 209; CHECK-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV]] to i32 210; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[SUM_02]], [[TMP15]] 211; CHECK-NEXT: [[TMP17]] = add i32 [[TMP16]], [[TMP14]] 212; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 213; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 214; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 215; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] 216; CHECK: ._crit_edge.loopexit: 217; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 218; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 219; CHECK: ._crit_edge: 220; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 221; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 222; 223 %1 = icmp sgt i32 %n, 0 224 br i1 %1, label %.lr.ph, label %._crit_edge 225 226.lr.ph: ; preds = %0, %.lr.ph 227 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 228 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] 229 %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 230 %3 = load i32, ptr %2, align 4 231 %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 232 %5 = load i32, ptr %4, align 4 233 %6 = mul nsw i32 %5, %3 234 %7 = trunc i64 %indvars.iv to i32 235 %8 = add i32 %sum.02, %7 236 %9 = add i32 %8, %6 237 %indvars.iv.next = add i64 %indvars.iv, 1 238 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 239 %exitcond = icmp eq i32 %lftr.wideiv, %n 240 br i1 %exitcond, label %._crit_edge, label %.lr.ph 241 242._crit_edge: ; preds = %.lr.ph, %0 243 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 244 ret i32 %sum.0.lcssa 245} 246 247define i32 @reduction_mul(i32 %n, ptr %A, ptr %B) { 248; CHECK-LABEL: define i32 @reduction_mul( 249; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 250; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N]], 0 251; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 252; CHECK: .lr.ph.preheader: 253; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 254; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 255; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 256; CHECK: vector.ph: 257; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 2147483644 258; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 259; CHECK: vector.body: 260; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 261; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 19, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 262; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 263; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 264; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 265; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 266; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 267; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_IND]] 268; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]] 269; CHECK-NEXT: [[TMP7]] = mul <4 x i32> [[TMP6]], [[VEC_PHI]] 270; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 271; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 272; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 273; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 274; CHECK: middle.block: 275; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP7]]) 276; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] 277; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 278; CHECK: scalar.ph: 279; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 280; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 19, [[DOTLR_PH_PREHEADER]] ] 281; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 282; CHECK: .lr.ph: 283; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 284; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP17:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 285; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 286; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 287; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 288; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 289; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 290; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP11]], [[TMP14]] 291; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], [[TMP13]] 292; CHECK-NEXT: [[TMP17]] = mul i32 [[TMP16]], [[SUM_02]] 293; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 294; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 295; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 296; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] 297; CHECK: ._crit_edge.loopexit: 298; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 299; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 300; CHECK: ._crit_edge: 301; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 302; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 303; 304 %1 = icmp sgt i32 %n, 0 305 br i1 %1, label %.lr.ph, label %._crit_edge 306 307.lr.ph: ; preds = %0, %.lr.ph 308 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 309 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ] 310 %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 311 %3 = load i32, ptr %2, align 4 312 %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 313 %5 = load i32, ptr %4, align 4 314 %6 = trunc i64 %indvars.iv to i32 315 %7 = add i32 %3, %6 316 %8 = add i32 %7, %5 317 %9 = mul i32 %8, %sum.02 318 %indvars.iv.next = add i64 %indvars.iv, 1 319 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 320 %exitcond = icmp eq i32 %lftr.wideiv, %n 321 br i1 %exitcond, label %._crit_edge, label %.lr.ph 322 323._crit_edge: ; preds = %.lr.ph, %0 324 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 325 ret i32 %sum.0.lcssa 326} 327 328define i32 @start_at_non_zero(ptr %in, ptr %coeff, ptr %out, i32 %n) { 329; CHECK-LABEL: define i32 @start_at_non_zero( 330; CHECK-SAME: ptr [[IN:%.*]], ptr [[COEFF:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) { 331; CHECK-NEXT: entry: 332; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0 333; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 334; CHECK: for.body.preheader: 335; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 336; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 337; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 338; CHECK: vector.ph: 339; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 340; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 341; CHECK: vector.body: 342; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 343; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 120, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 344; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] 345; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 346; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDEX]] 347; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 348; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 349; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[TMP3]], [[VEC_PHI]] 350; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 351; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 352; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 353; CHECK: middle.block: 354; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]]) 355; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 356; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 357; CHECK: scalar.ph: 358; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 359; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 120, [[FOR_BODY_PREHEADER]] ] 360; CHECK-NEXT: br label [[FOR_BODY:%.*]] 361; CHECK: for.body: 362; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 363; CHECK-NEXT: [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 364; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDVARS_IV]] 365; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 366; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDVARS_IV]] 367; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 368; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], [[TMP7]] 369; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]] 370; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 371; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 372; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 373; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 374; CHECK: for.end.loopexit: 375; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 376; CHECK-NEXT: br label [[FOR_END]] 377; CHECK: for.end: 378; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 120, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] 379; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 380; 381entry: 382 %cmp7 = icmp sgt i32 %n, 0 383 br i1 %cmp7, label %for.body, label %for.end 384 385for.body: ; preds = %entry, %for.body 386 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 387 %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ] 388 %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv 389 %0 = load i32, ptr %arrayidx, align 4 390 %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv 391 %1 = load i32, ptr %arrayidx2, align 4 392 %mul = mul nsw i32 %1, %0 393 %add = add nsw i32 %mul, %sum.09 394 %indvars.iv.next = add i64 %indvars.iv, 1 395 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 396 %exitcond = icmp eq i32 %lftr.wideiv, %n 397 br i1 %exitcond, label %for.end, label %for.body 398 399for.end: ; preds = %for.body, %entry 400 %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ] 401 ret i32 %sum.0.lcssa 402} 403 404define i32 @reduction_and(i32 %n, ptr %A, ptr %B) { 405; CHECK-LABEL: define i32 @reduction_and( 406; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 407; CHECK-NEXT: entry: 408; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0 409; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 410; CHECK: for.body.preheader: 411; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 412; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 413; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 414; CHECK: vector.ph: 415; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 416; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 417; CHECK: vector.body: 418; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 419; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 420; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 421; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 422; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 423; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 424; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 425; CHECK-NEXT: [[TMP4]] = and <4 x i32> [[TMP3]], [[VEC_PHI]] 426; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 427; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 428; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 429; CHECK: middle.block: 430; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP4]]) 431; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 432; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 433; CHECK: scalar.ph: 434; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 435; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ -1, [[FOR_BODY_PREHEADER]] ] 436; CHECK-NEXT: br label [[FOR_BODY:%.*]] 437; CHECK: for.body: 438; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 439; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 440; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 441; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 442; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 443; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 444; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]] 445; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[RESULT_08]] 446; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 447; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 448; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 449; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 450; CHECK: for.end.loopexit: 451; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 452; CHECK-NEXT: br label [[FOR_END]] 453; CHECK: for.end: 454; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[AND_LCSSA]], [[FOR_END_LOOPEXIT]] ] 455; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 456; 457entry: 458 %cmp7 = icmp sgt i32 %n, 0 459 br i1 %cmp7, label %for.body, label %for.end 460 461for.body: ; preds = %entry, %for.body 462 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 463 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] 464 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 465 %0 = load i32, ptr %arrayidx, align 4 466 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 467 %1 = load i32, ptr %arrayidx2, align 4 468 %add = add nsw i32 %1, %0 469 %and = and i32 %add, %result.08 470 %indvars.iv.next = add i64 %indvars.iv, 1 471 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 472 %exitcond = icmp eq i32 %lftr.wideiv, %n 473 br i1 %exitcond, label %for.end, label %for.body 474 475for.end: ; preds = %for.body, %entry 476 %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ] 477 ret i32 %result.0.lcssa 478} 479 480define i32 @reduction_or(i32 %n, ptr %A, ptr %B) { 481; CHECK-LABEL: define i32 @reduction_or( 482; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 483; CHECK-NEXT: entry: 484; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0 485; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 486; CHECK: for.body.preheader: 487; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 488; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 489; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 490; CHECK: vector.ph: 491; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 492; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 493; CHECK: vector.body: 494; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 495; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 496; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 497; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 498; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 499; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 500; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 501; CHECK-NEXT: [[TMP4]] = or <4 x i32> [[TMP3]], [[VEC_PHI]] 502; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 503; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 504; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 505; CHECK: middle.block: 506; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP4]]) 507; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 508; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 509; CHECK: scalar.ph: 510; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 511; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 512; CHECK-NEXT: br label [[FOR_BODY:%.*]] 513; CHECK: for.body: 514; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 515; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 516; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 517; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 518; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 519; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 520; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]] 521; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] 522; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 523; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 524; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 525; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 526; CHECK: for.end.loopexit: 527; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 528; CHECK-NEXT: br label [[FOR_END]] 529; CHECK: for.end: 530; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 531; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 532; 533entry: 534 %cmp7 = icmp sgt i32 %n, 0 535 br i1 %cmp7, label %for.body, label %for.end 536 537for.body: ; preds = %entry, %for.body 538 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 539 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 540 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 541 %0 = load i32, ptr %arrayidx, align 4 542 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 543 %1 = load i32, ptr %arrayidx2, align 4 544 %add = add nsw i32 %1, %0 545 %or = or i32 %add, %result.08 546 %indvars.iv.next = add i64 %indvars.iv, 1 547 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 548 %exitcond = icmp eq i32 %lftr.wideiv, %n 549 br i1 %exitcond, label %for.end, label %for.body 550 551for.end: ; preds = %for.body, %entry 552 %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ] 553 ret i32 %result.0.lcssa 554} 555 556define i32 @reduction_xor(i32 %n, ptr %A, ptr %B) { 557; CHECK-LABEL: define i32 @reduction_xor( 558; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 559; CHECK-NEXT: entry: 560; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N]], 0 561; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 562; CHECK: for.body.preheader: 563; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 564; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 565; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 566; CHECK: vector.ph: 567; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 568; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 569; CHECK: vector.body: 570; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 571; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 572; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 573; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 574; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 575; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 576; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 577; CHECK-NEXT: [[TMP4]] = xor <4 x i32> [[TMP3]], [[VEC_PHI]] 578; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 579; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 580; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 581; CHECK: middle.block: 582; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP4]]) 583; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 584; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 585; CHECK: scalar.ph: 586; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 587; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 588; CHECK-NEXT: br label [[FOR_BODY:%.*]] 589; CHECK: for.body: 590; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 591; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 592; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 593; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 594; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 595; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 596; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]] 597; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] 598; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 599; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 600; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 601; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 602; CHECK: for.end.loopexit: 603; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 604; CHECK-NEXT: br label [[FOR_END]] 605; CHECK: for.end: 606; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[XOR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 607; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 608; 609entry: 610 %cmp7 = icmp sgt i32 %n, 0 611 br i1 %cmp7, label %for.body, label %for.end 612 613for.body: ; preds = %entry, %for.body 614 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 615 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] 616 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 617 %0 = load i32, ptr %arrayidx, align 4 618 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 619 %1 = load i32, ptr %arrayidx2, align 4 620 %add = add nsw i32 %1, %0 621 %xor = xor i32 %add, %result.08 622 %indvars.iv.next = add i64 %indvars.iv, 1 623 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 624 %exitcond = icmp eq i32 %lftr.wideiv, %n 625 br i1 %exitcond, label %for.end, label %for.body 626 627for.end: ; preds = %for.body, %entry 628 %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ] 629 ret i32 %result.0.lcssa 630} 631 632; In this code the subtracted variable is on the RHS and this is not an induction variable. 633define i32 @reduction_sub_rhs(i32 %n, ptr %A) { 634; CHECK-LABEL: define i32 @reduction_sub_rhs( 635; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]]) { 636; CHECK-NEXT: entry: 637; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0 638; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 639; CHECK: for.body.preheader: 640; CHECK-NEXT: br label [[FOR_BODY:%.*]] 641; CHECK: for.body: 642; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 643; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 644; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 645; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 646; CHECK-NEXT: [[SUB]] = sub nsw i32 [[TMP0]], [[X_05]] 647; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 648; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 649; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 650; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]] 651; CHECK: for.end.loopexit: 652; CHECK-NEXT: br label [[FOR_END]] 653; CHECK: for.end: 654; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB]], [[FOR_END_LOOPEXIT]] ] 655; CHECK-NEXT: ret i32 [[X_0_LCSSA]] 656; 657entry: 658 %cmp4 = icmp sgt i32 %n, 0 659 br i1 %cmp4, label %for.body, label %for.end 660 661for.body: ; preds = %entry, %for.body 662 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 663 %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ] 664 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 665 %0 = load i32, ptr %arrayidx, align 4 666 %sub = sub nsw i32 %0, %x.05 667 %indvars.iv.next = add i64 %indvars.iv, 1 668 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 669 %exitcond = icmp eq i32 %lftr.wideiv, %n 670 br i1 %exitcond, label %for.end, label %for.body 671 672for.end: ; preds = %for.body, %entry 673 %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] 674 ret i32 %x.0.lcssa 675} 676 677 678; In this test the reduction variable is on the LHS and we can vectorize it. 679define i32 @reduction_sub_lhs(i32 %n, ptr %A) { 680; CHECK-LABEL: define i32 @reduction_sub_lhs( 681; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]]) { 682; CHECK-NEXT: entry: 683; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N]], 0 684; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 685; CHECK: for.body.preheader: 686; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[N]] to i64 687; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 688; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 689; CHECK: vector.ph: 690; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 691; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 692; CHECK: vector.body: 693; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 694; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] 695; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 696; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 697; CHECK-NEXT: [[TMP2]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 698; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 699; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 700; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 701; CHECK: middle.block: 702; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]]) 703; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]] 704; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 705; CHECK: scalar.ph: 706; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 707; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 708; CHECK-NEXT: br label [[FOR_BODY:%.*]] 709; CHECK: for.body: 710; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 711; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 712; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 713; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 714; CHECK-NEXT: [[SUB]] = sub nsw i32 [[X_05]], [[TMP5]] 715; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 716; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 717; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 718; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 719; CHECK: for.end.loopexit: 720; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ] 721; CHECK-NEXT: br label [[FOR_END]] 722; CHECK: for.end: 723; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB_LCSSA]], [[FOR_END_LOOPEXIT]] ] 724; CHECK-NEXT: ret i32 [[X_0_LCSSA]] 725; 726entry: 727 %cmp4 = icmp sgt i32 %n, 0 728 br i1 %cmp4, label %for.body, label %for.end 729 730for.body: ; preds = %entry, %for.body 731 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 732 %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ] 733 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 734 %0 = load i32, ptr %arrayidx, align 4 735 %sub = sub nsw i32 %x.05, %0 736 %indvars.iv.next = add i64 %indvars.iv, 1 737 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 738 %exitcond = icmp eq i32 %lftr.wideiv, %n 739 br i1 %exitcond, label %for.end, label %for.body 740 741for.end: ; preds = %for.body, %entry 742 %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] 743 ret i32 %x.0.lcssa 744} 745 746; We can vectorize conditional reductions with multi-input phis. 747define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { 748; CHECK-LABEL: define float @reduction_conditional( 749; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], float [[S:%.*]]) { 750; CHECK-NEXT: entry: 751; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 752; CHECK: vector.ph: 753; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S]], i64 0 754; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 755; CHECK: vector.body: 756; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 757; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] 758; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] 759; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 760; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] 761; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 762; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 763; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) 764; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] 765; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) 766; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] 767; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) 768; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] 769; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) 770; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] 771; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] 772; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] 773; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] 774; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 775; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 776; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 777; CHECK: middle.block: 778; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) 779; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 780; CHECK: scalar.ph: 781; CHECK-NEXT: br label [[FOR_BODY:%.*]] 782; CHECK: for.body: 783; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] 784; CHECK: if.then: 785; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] 786; CHECK: if.then8: 787; CHECK-NEXT: br label [[FOR_INC]] 788; CHECK: if.else: 789; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] 790; CHECK: if.then16: 791; CHECK-NEXT: br label [[FOR_INC]] 792; CHECK: for.inc: 793; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP21:![0-9]+]] 794; CHECK: for.end: 795; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 796; CHECK-NEXT: ret float [[SUM_1_LCSSA]] 797; 798entry: 799 br label %for.body 800 801for.body: 802 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 803 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] 804 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 805 %0 = load float, ptr %arrayidx, align 4 806 %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 807 %1 = load float, ptr %arrayidx2, align 4 808 %cmp3 = fcmp ogt float %0, %1 809 br i1 %cmp3, label %if.then, label %for.inc 810 811if.then: 812 %cmp6 = fcmp ogt float %1, 1.000000e+00 813 br i1 %cmp6, label %if.then8, label %if.else 814 815if.then8: 816 %add = fadd fast float %sum.033, %0 817 br label %for.inc 818 819if.else: 820 %cmp14 = fcmp ogt float %0, 2.000000e+00 821 br i1 %cmp14, label %if.then16, label %for.inc 822 823if.then16: 824 %add19 = fadd fast float %sum.033, %1 825 br label %for.inc 826 827for.inc: 828 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ] 829 %indvars.iv.next = add i64 %indvars.iv, 1 830 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 831 %exitcond = icmp ne i32 %lftr.wideiv, 128 832 br i1 %exitcond, label %for.body, label %for.end 833 834for.end: 835 %sum.1.lcssa = phi float [ %sum.1, %for.inc ] 836 ret float %sum.1.lcssa 837} 838 839; We can't vectorize reductions with phi inputs from outside the reduction. 840define float @noreduction_phi(ptr %A, ptr %B, ptr %C, float %S) { 841; CHECK-LABEL: define float @noreduction_phi( 842; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], float [[S:%.*]]) { 843; CHECK-NEXT: entry: 844; CHECK-NEXT: br label [[FOR_BODY:%.*]] 845; CHECK: for.body: 846; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 847; CHECK-NEXT: [[SUM_033:%.*]] = phi float [ [[S]], [[ENTRY]] ], [ [[SUM_1:%.*]], [[FOR_INC]] ] 848; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]] 849; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 850; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 851; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 852; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[TMP0]], [[TMP1]] 853; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN:%.*]], label [[FOR_INC]] 854; CHECK: if.then: 855; CHECK-NEXT: [[CMP6:%.*]] = fcmp ogt float [[TMP1]], 1.000000e+00 856; CHECK-NEXT: br i1 [[CMP6]], label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] 857; CHECK: if.then8: 858; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[SUM_033]], [[TMP0]] 859; CHECK-NEXT: br label [[FOR_INC]] 860; CHECK: if.else: 861; CHECK-NEXT: [[CMP14:%.*]] = fcmp ogt float [[TMP0]], 2.000000e+00 862; CHECK-NEXT: br i1 [[CMP14]], label [[IF_THEN16:%.*]], label [[FOR_INC]] 863; CHECK: if.then16: 864; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[SUM_033]], [[TMP1]] 865; CHECK-NEXT: br label [[FOR_INC]] 866; CHECK: for.inc: 867; CHECK-NEXT: [[SUM_1]] = phi float [ [[ADD]], [[IF_THEN8]] ], [ [[ADD19]], [[IF_THEN16]] ], [ 0.000000e+00, [[IF_ELSE]] ], [ [[SUM_033]], [[FOR_BODY]] ] 868; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 869; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 870; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 128 871; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 872; CHECK: for.end: 873; CHECK-NEXT: ret float [[SUM_1]] 874; 875entry: 876 br label %for.body 877 878for.body: 879 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 880 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] 881 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 882 %0 = load float, ptr %arrayidx, align 4 883 %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 884 %1 = load float, ptr %arrayidx2, align 4 885 %cmp3 = fcmp ogt float %0, %1 886 br i1 %cmp3, label %if.then, label %for.inc 887 888if.then: 889 %cmp6 = fcmp ogt float %1, 1.000000e+00 890 br i1 %cmp6, label %if.then8, label %if.else 891 892if.then8: 893 %add = fadd fast float %sum.033, %0 894 br label %for.inc 895 896if.else: 897 %cmp14 = fcmp ogt float %0, 2.000000e+00 898 br i1 %cmp14, label %if.then16, label %for.inc 899 900if.then16: 901 %add19 = fadd fast float %sum.033, %1 902 br label %for.inc 903 904for.inc: 905 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ 0.000000e+00, %if.else ], [ %sum.033, %for.body ] 906 %indvars.iv.next = add i64 %indvars.iv, 1 907 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 908 %exitcond = icmp ne i32 %lftr.wideiv, 128 909 br i1 %exitcond, label %for.body, label %for.end 910 911for.end: 912 %sum.1.lcssa = phi float [ %sum.1, %for.inc ] 913 ret float %sum.1.lcssa 914} 915 916; We can't vectorize reductions that feed another header PHI. 917define float @noredux_header_phi(ptr %A, ptr %B, ptr %C, float %S) { 918; CHECK-LABEL: define float @noredux_header_phi( 919; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], float [[S:%.*]]) { 920; CHECK-NEXT: entry: 921; CHECK-NEXT: br label [[FOR_BODY:%.*]] 922; CHECK: for.body: 923; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 924; CHECK-NEXT: [[SUM2_09:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[ADD1:%.*]], [[FOR_BODY]] ] 925; CHECK-NEXT: [[SUM_08:%.*]] = phi float [ [[S]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] 926; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDVARS_IV]] 927; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 928; CHECK-NEXT: [[ADD]] = fadd fast float [[SUM_08]], [[TMP0]] 929; CHECK-NEXT: [[ADD1]] = fadd fast float [[SUM2_09]], [[ADD]] 930; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 931; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 932; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 128 933; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 934; CHECK: for.end: 935; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[ADD]], [[ADD1]] 936; CHECK-NEXT: ret float [[ADD2]] 937; 938entry: 939 br label %for.body 940 941for.body: 942 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 943 %sum2.09 = phi float [ 0.000000e+00, %entry ], [ %add1, %for.body ] 944 %sum.08 = phi float [ %S, %entry ], [ %add, %for.body ] 945 %arrayidx = getelementptr inbounds float, ptr %B, i64 %indvars.iv 946 %0 = load float, ptr %arrayidx, align 4 947 %add = fadd fast float %sum.08, %0 948 %add1 = fadd fast float %sum2.09, %add 949 %indvars.iv.next = add i64 %indvars.iv, 1 950 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 951 %exitcond = icmp ne i32 %lftr.wideiv, 128 952 br i1 %exitcond, label %for.body, label %for.end 953 954for.end: 955 %add1.lcssa = phi float [ %add1, %for.body ] 956 %add.lcssa = phi float [ %add, %for.body ] 957 %add2 = fadd fast float %add.lcssa, %add1.lcssa 958 ret float %add2 959} 960 961 962; When vectorizing a reduction whose loop header phi value is used outside the 963; loop special care must be taken. Otherwise, the reduced value feeding into the 964; outside user misses a few iterations (VF-1) of the loop. 965; PR16522 966define i32 @phivalueredux(i32 %p) { 967; CHECK-LABEL: define i32 @phivalueredux( 968; CHECK-SAME: i32 [[P:%.*]]) { 969; CHECK-NEXT: entry: 970; CHECK-NEXT: br label [[FOR_BODY:%.*]] 971; CHECK: for.body: 972; CHECK-NEXT: [[T_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 973; CHECK-NEXT: [[P_ADDR_02:%.*]] = phi i32 [ [[P]], [[ENTRY]] ], [ [[XOR:%.*]], [[FOR_BODY]] ] 974; CHECK-NEXT: [[XOR]] = xor i32 [[P_ADDR_02]], -1 975; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[T_03]], 1 976; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 16 977; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 978; CHECK: for.end: 979; CHECK-NEXT: ret i32 [[P_ADDR_02]] 980; 981entry: 982 br label %for.body 983 984for.body: 985 %t.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 986 %p.addr.02 = phi i32 [ %p, %entry ], [ %xor, %for.body ] 987 %xor = xor i32 %p.addr.02, -1 988 %inc = add nsw i32 %t.03, 1 989 %exitcond = icmp eq i32 %inc, 16 990 br i1 %exitcond, label %for.end, label %for.body 991 992for.end: 993 ret i32 %p.addr.02 994} 995 996; Don't vectorize a reduction value that is not the last in a reduction cyle. We 997; would loose iterations (VF-1) on the operations after that use. 998; PR17498 999define i32 @not_last_operation(i32 %p, i32 %val) { 1000; CHECK-LABEL: define i32 @not_last_operation( 1001; CHECK-SAME: i32 [[P:%.*]], i32 [[VAL:%.*]]) { 1002; CHECK-NEXT: entry: 1003; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[P]], 0 1004; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1005; CHECK: for.body: 1006; CHECK-NEXT: [[INC613_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC6_1:%.*]], [[FOR_BODY]] ] 1007; CHECK-NEXT: [[INC511_1:%.*]] = phi i32 [ [[VAL]], [[ENTRY]] ], [ [[INC5_1:%.*]], [[FOR_BODY]] ] 1008; CHECK-NEXT: [[INC4_1:%.*]] = zext i1 [[TOBOOL]] to i32 1009; CHECK-NEXT: [[INC511_1_INC4_1:%.*]] = add nsw i32 [[INC511_1]], [[INC4_1]] 1010; CHECK-NEXT: [[INC5_1]] = add nsw i32 [[INC511_1_INC4_1]], 1 1011; CHECK-NEXT: [[INC6_1]] = add nuw nsw i32 [[INC613_1]], 1 1012; CHECK-NEXT: [[EXITCOND_1:%.*]] = icmp eq i32 [[INC6_1]], 22 1013; CHECK-NEXT: br i1 [[EXITCOND_1]], label [[EXIT:%.*]], label [[FOR_BODY]] 1014; CHECK: exit: 1015; CHECK-NEXT: [[INC_2:%.*]] = add nsw i32 [[INC511_1_INC4_1]], 2 1016; CHECK-NEXT: ret i32 [[INC_2]] 1017; 1018entry: 1019 %tobool = icmp eq i32 %p, 0 1020 br label %for.body 1021 1022for.body: 1023 %inc613.1 = phi i32 [ 0, %entry ], [ %inc6.1, %for.body ] 1024 %inc511.1 = phi i32 [ %val, %entry ], [ %inc5.1, %for.body ] 1025 %0 = zext i1 %tobool to i32 1026 %inc4.1 = xor i32 %0, 1 1027 %inc511.1.inc4.1 = add nsw i32 %inc511.1, %inc4.1 1028 %inc5.1 = add nsw i32 %inc511.1.inc4.1, 1 1029 %inc6.1 = add nsw i32 %inc613.1, 1 1030 %exitcond.1 = icmp eq i32 %inc6.1, 22 1031 br i1 %exitcond.1, label %exit, label %for.body 1032 1033exit: 1034 %inc.2 = add nsw i32 %inc511.1.inc4.1, 2 1035 ret i32 %inc.2 1036} 1037 1038define i32 @reduction_sum_multiuse(i32 %n, ptr %A, ptr %B) { 1039; CHECK-LABEL: define i32 @reduction_sum_multiuse( 1040; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { 1041; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N]], 0 1042; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[END:%.*]] 1043; CHECK: .lr.ph.preheader: 1044; CHECK-NEXT: [[TMP2:%.*]] = zext nneg i32 [[N]] to i64 1045; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 1046; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1047; CHECK: vector.ph: 1048; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 2147483644 1049; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1050; CHECK: vector.body: 1051; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1052; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] 1053; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1054; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 1055; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 1056; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] 1057; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 1058; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]] 1059; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD]] 1060; CHECK-NEXT: [[TMP7]] = add <4 x i32> [[TMP6]], [[WIDE_LOAD1]] 1061; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1062; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 1063; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1064; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1065; CHECK: middle.block: 1066; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]]) 1067; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP2]] 1068; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1069; CHECK: scalar.ph: 1070; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 1071; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 1072; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1073; CHECK: .lr.ph: 1074; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1075; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP17:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1076; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] 1077; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 1078; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] 1079; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 1080; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV]] to i32 1081; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SUM_02]], [[TMP14]] 1082; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP15]], [[TMP11]] 1083; CHECK-NEXT: [[TMP17]] = add i32 [[TMP16]], [[TMP13]] 1084; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 1085; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1086; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[N]], [[LFTR_WIDEIV]] 1087; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP23:![0-9]+]] 1088; CHECK: ._crit_edge: 1089; CHECK-NEXT: [[SUM_COPY:%.*]] = phi i32 [ [[TMP17]], [[DOTLR_PH]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] 1090; CHECK-NEXT: [[TMP18:%.*]] = shl i32 [[SUM_COPY]], 1 1091; CHECK-NEXT: br label [[END]] 1092; CHECK: end: 1093; CHECK-NEXT: [[F2:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP18]], [[DOT_CRIT_EDGE]] ] 1094; CHECK-NEXT: ret i32 [[F2]] 1095; 1096 %1 = icmp sgt i32 %n, 0 1097 br i1 %1, label %.lr.ph.preheader, label %end 1098.lr.ph.preheader: ; preds = %0 1099 br label %.lr.ph 1100 1101.lr.ph: ; preds = %0, %.lr.ph 1102 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 1103 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ] 1104 %2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 1105 %3 = load i32, ptr %2, align 4 1106 %4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 1107 %5 = load i32, ptr %4, align 4 1108 %6 = trunc i64 %indvars.iv to i32 1109 %7 = add i32 %sum.02, %6 1110 %8 = add i32 %7, %3 1111 %9 = add i32 %8, %5 1112 %indvars.iv.next = add i64 %indvars.iv, 1 1113 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1114 %exitcond = icmp eq i32 %lftr.wideiv, %n 1115 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1116 1117._crit_edge: ; preds = %.lr.ph, %0 1118 %sum.lcssa = phi i32 [ %9, %.lr.ph ] 1119 %sum.copy = phi i32 [ %9, %.lr.ph ] 1120 br label %end 1121 1122end: 1123 %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ] 1124 %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ] 1125 %final = add i32 %f1, %f2 1126 ret i32 %final 1127} 1128 1129; This looks like a predicated reduction, but it is a reset of the reduction 1130; variable. We cannot vectorize this. 1131define void @reduction_reset(i32 %N, ptr %arrayA, ptr %arrayB) { 1132; CHECK-LABEL: define void @reduction_reset( 1133; CHECK-SAME: i32 [[N:%.*]], ptr [[ARRAYA:%.*]], ptr [[ARRAYB:%.*]]) { 1134; CHECK-NEXT: entry: 1135; CHECK-NEXT: [[C4:%.*]] = icmp sgt i32 [[N]], 0 1136; CHECK-NEXT: br i1 [[C4]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 1137; CHECK: .lr.ph.preheader: 1138; CHECK-NEXT: [[C5:%.*]] = add nsw i32 [[N]], -1 1139; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 1140; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1141; CHECK: .lr.ph: 1142; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[DOTLR_PH_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ] 1143; CHECK-NEXT: [[DOT017:%.*]] = phi i32 [ 100, [[DOTLR_PH_PREHEADER]] ], [ [[CSEL:%.*]], [[DOTLR_PH]] ] 1144; CHECK-NEXT: [[C6:%.*]] = getelementptr inbounds nuw i32, ptr [[ARRAYA]], i64 [[INDVARS_IV]] 1145; CHECK-NEXT: [[C7:%.*]] = load i32, ptr [[C6]], align 4 1146; CHECK-NEXT: [[C8:%.*]] = icmp sgt i32 [[C7]], 0 1147; CHECK-NEXT: [[C9:%.*]] = add nsw i32 [[C7]], [[DOT017]] 1148; CHECK-NEXT: [[CSEL]] = select i1 [[C8]], i32 [[C9]], i32 0 1149; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1150; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 1151; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[DOTLR_PH]] 1152; CHECK: ._crit_edge.loopexit: 1153; CHECK-NEXT: [[PHITMP19:%.*]] = sext i32 [[C5]] to i64 1154; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 1155; CHECK: ._crit_edge: 1156; CHECK-NEXT: [[DOT015_LCSSA:%.*]] = phi i64 [ -1, [[ENTRY:%.*]] ], [ [[PHITMP19]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 1157; CHECK-NEXT: [[DOT0_LCSSA:%.*]] = phi i32 [ 100, [[ENTRY]] ], [ [[CSEL]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 1158; CHECK-NEXT: [[C10:%.*]] = getelementptr inbounds i32, ptr [[ARRAYB]], i64 [[DOT015_LCSSA]] 1159; CHECK-NEXT: store i32 [[DOT0_LCSSA]], ptr [[C10]], align 4 1160; CHECK-NEXT: ret void 1161; 1162entry: 1163 %c4 = icmp sgt i32 %N, 0 1164 br i1 %c4, label %.lr.ph.preheader, label %._crit_edge 1165 1166.lr.ph.preheader: ; preds = %entry 1167 %c5 = add i32 %N, -1 1168 %wide.trip.count = zext i32 %N to i64 1169 br label %.lr.ph 1170 1171.lr.ph: ; preds = %.lr.ph, %.lr.ph.preheader 1172 %indvars.iv = phi i64 [ 0, %.lr.ph.preheader ], [ %indvars.iv.next, %.lr.ph ] 1173 %.017 = phi i32 [ 100, %.lr.ph.preheader ], [ %csel, %.lr.ph ] 1174 %c6 = getelementptr inbounds i32, ptr %arrayA, i64 %indvars.iv 1175 %c7 = load i32, ptr %c6, align 4 1176 %c8 = icmp sgt i32 %c7, 0 1177 %c9 = add nsw i32 %c7, %.017 1178 %csel = select i1 %c8, i32 %c9, i32 0 1179 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1180 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 1181 br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph 1182 1183._crit_edge.loopexit: ; preds = %.lr.ph 1184 %csel.lcssa = phi i32 [ %csel, %.lr.ph ] 1185 %phitmp19 = sext i32 %c5 to i64 1186 br label %._crit_edge 1187 1188._crit_edge: ; preds = %._crit_edge.loopexit, %entry 1189 %.015.lcssa = phi i64 [ -1, %entry ], [ %phitmp19, %._crit_edge.loopexit ] 1190 %.0.lcssa = phi i32 [ 100, %entry ], [ %csel.lcssa, %._crit_edge.loopexit ] 1191 %c10 = getelementptr inbounds i32, ptr %arrayB, i64 %.015.lcssa 1192 store i32 %.0.lcssa, ptr %c10, align 4 1193 ret void 1194} 1195 1196; Can vectorize reduction with redundant single-operand phi input. 1197define i64 @reduction_with_phi_with_one_incoming_on_backedge(i16 %n, ptr %A) { 1198; CHECK-LABEL: define i64 @reduction_with_phi_with_one_incoming_on_backedge( 1199; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) { 1200; CHECK-NEXT: entry: 1201; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2) 1202; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1 1203; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32 1204; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5 1205; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1206; CHECK: vector.ph: 1207; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 1208; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 1209; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 1210; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1211; CHECK: vector.body: 1212; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1213; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1214; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i32 [[INDEX]] to i16 1215; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i16 [[DOTCAST1]], 1 1216; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[OFFSET_IDX]] to i64 1217; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP2]] 1218; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 4 1219; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] 1220; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 1221; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1222; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1223; CHECK: middle.block: 1224; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) 1225; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] 1226; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1227; CHECK: scalar.ph: 1228; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] 1229; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1230; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1231; CHECK: loop.header: 1232; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1233; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1234; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[IV]] to i64 1235; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP7]] 1236; CHECK-NEXT: [[LV_A:%.*]] = load i64, ptr [[GEP_A]], align 4 1237; CHECK-NEXT: [[SUM_NEXT]] = add nsw i64 [[SUM]], [[LV_A]] 1238; CHECK-NEXT: br label [[LOOP_BB:%.*]] 1239; CHECK: loop.bb: 1240; CHECK-NEXT: br label [[LOOP_LATCH]] 1241; CHECK: loop.latch: 1242; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 1243; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[IV_NEXT]], [[N]] 1244; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP25:![0-9]+]] 1245; CHECK: exit: 1246; CHECK-NEXT: [[LCSSA_EXIT:%.*]] = phi i64 [ [[SUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 1247; CHECK-NEXT: ret i64 [[LCSSA_EXIT]] 1248; 1249entry: 1250 br label %loop.header 1251 1252loop.header: 1253 %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ] 1254 %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ] 1255 %gep.A = getelementptr i64, ptr %A, i16 %iv 1256 %lv.A = load i64, ptr %gep.A 1257 %sum.next = add nsw i64 %sum, %lv.A 1258 br label %loop.bb 1259 1260loop.bb: 1261 %phi.sum.next = phi i64 [ %sum.next, %loop.header ] 1262 br label %loop.latch 1263 1264loop.latch: 1265 %iv.next = add nsw i16 %iv, 1 1266 %cond = icmp slt i16 %iv.next, %n 1267 br i1 %cond, label %loop.header, label %exit 1268 1269exit: 1270 %lcssa.exit = phi i64 [ %phi.sum.next, %loop.latch ] 1271 ret i64 %lcssa.exit 1272} 1273 1274; Can vectorize reduction with redundant two-operand phi input. 1275define i64 @reduction_with_phi_with_two_incoming_on_backedge(i16 %n, ptr %A) { 1276; CHECK-LABEL: define i64 @reduction_with_phi_with_two_incoming_on_backedge( 1277; CHECK-SAME: i16 [[N:%.*]], ptr [[A:%.*]]) { 1278; CHECK-NEXT: entry: 1279; CHECK-NEXT: [[SMAX:%.*]] = call i16 @llvm.smax.i16(i16 [[N]], i16 2) 1280; CHECK-NEXT: [[TMP0:%.*]] = add nsw i16 [[SMAX]], -1 1281; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i16 [[TMP0]] to i32 1282; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i16 [[N]], 5 1283; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1284; CHECK: vector.ph: 1285; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP1]], 32764 1286; CHECK-NEXT: [[DOTCAST:%.*]] = trunc nuw nsw i32 [[N_VEC]] to i16 1287; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i16 [[DOTCAST]], 1 1288; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1289; CHECK: vector.body: 1290; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1291; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] 1292; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i32 [[INDEX]] to i16 1293; CHECK-NEXT: [[OFFSET_IDX:%.*]] = or disjoint i16 [[DOTCAST1]], 1 1294; CHECK-NEXT: [[TMP2:%.*]] = sext i16 [[OFFSET_IDX]] to i64 1295; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP2]] 1296; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 4 1297; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]] 1298; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 1299; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] 1300; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1301; CHECK: middle.block: 1302; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]]) 1303; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[TMP1]] 1304; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] 1305; CHECK: scalar.ph: 1306; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] 1307; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 1308; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 1309; CHECK: loop.header: 1310; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] 1311; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[LOOP_LATCH]] ] 1312; CHECK-NEXT: [[TMP7:%.*]] = sext i16 [[IV]] to i64 1313; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr i64, ptr [[A]], i64 [[TMP7]] 1314; CHECK-NEXT: [[LV_A:%.*]] = load i64, ptr [[GEP_A]], align 4 1315; CHECK-NEXT: [[SUM_NEXT]] = add nsw i64 [[SUM]], [[LV_A]] 1316; CHECK-NEXT: [[CMP_0:%.*]] = icmp eq i64 [[LV_A]], 29 1317; CHECK-NEXT: br i1 [[CMP_0]], label [[LOOP_BB:%.*]], label [[LOOP_LATCH]] 1318; CHECK: loop.bb: 1319; CHECK-NEXT: br label [[LOOP_LATCH]] 1320; CHECK: loop.latch: 1321; CHECK-NEXT: [[IV_NEXT]] = add nsw i16 [[IV]], 1 1322; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[IV_NEXT]], [[N]] 1323; CHECK-NEXT: br i1 [[COND]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP27:![0-9]+]] 1324; CHECK: exit: 1325; CHECK-NEXT: [[LCSSA_EXIT:%.*]] = phi i64 [ [[SUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ] 1326; CHECK-NEXT: ret i64 [[LCSSA_EXIT]] 1327; 1328entry: 1329 br label %loop.header 1330 1331loop.header: 1332 %iv = phi i16 [ 1, %entry ], [ %iv.next, %loop.latch ] 1333 %sum = phi i64 [ 0, %entry ], [ %phi.sum.next, %loop.latch ] 1334 %gep.A = getelementptr i64, ptr %A, i16 %iv 1335 %lv.A = load i64, ptr %gep.A 1336 %sum.next = add nsw i64 %sum, %lv.A 1337 %cmp.0 = icmp eq i64 %lv.A, 29 1338 br i1 %cmp.0, label %loop.bb, label %loop.latch 1339 1340loop.bb: 1341 br label %loop.latch 1342 1343loop.latch: 1344 %phi.sum.next = phi i64 [ %sum.next, %loop.bb ], [ %sum.next, %loop.header ] 1345 %iv.next = add nsw i16 %iv, 1 1346 %cond = icmp slt i16 %iv.next, %n 1347 br i1 %cond, label %loop.header, label %exit 1348 1349exit: 1350 %lcssa.exit = phi i64 [ %phi.sum.next, %loop.latch ] 1351 ret i64 %lcssa.exit 1352} 1353