1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -dce -instcombine -S | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 6define i32 @reduction_sum_single(i32* noalias nocapture %A) { 7; CHECK-LABEL: @reduction_sum_single( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 10; CHECK: vector.ph: 11; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 12; CHECK: vector.body: 13; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 14; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 15; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] 16; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 17; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 18; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 19; CHECK: pred.load.if: 20; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 21; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 22; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 23; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 24; CHECK: pred.load.continue: 25; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 26; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 27; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 28; CHECK: pred.load.if1: 29; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 30; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 31; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 32; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1 33; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 34; CHECK: pred.load.continue2: 35; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 36; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 37; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 38; CHECK: pred.load.if3: 39; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 40; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 41; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 42; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2 43; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 44; CHECK: pred.load.continue4: 45; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 46; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 47; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 48; CHECK: pred.load.if5: 49; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 50; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 51; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 52; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3 53; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 54; CHECK: pred.load.continue6: 55; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 56; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 57; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) 58; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]] 59; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 60; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 61; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 62; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 63; CHECK: middle.block: 64; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 65; CHECK: scalar.ph: 66; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 67; CHECK: .lr.ph: 68; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP2:![0-9]+]] 69; CHECK: ._crit_edge: 70; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 71; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 72; 73entry: 74 br label %.lr.ph 75 76.lr.ph: ; preds = %entry, %.lr.ph 77 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 78 %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] 79 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 80 %l3 = load i32, i32* %l2, align 4 81 %l7 = add i32 %sum.02, %l3 82 %indvars.iv.next = add i64 %indvars.iv, 1 83 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 84 %exitcond = icmp eq i32 %lftr.wideiv, 257 85 br i1 %exitcond, label %._crit_edge, label %.lr.ph 86 87._crit_edge: ; preds = %.lr.ph 88 %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] 89 ret i32 %sum.0.lcssa 90} 91 92define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) { 93; CHECK-LABEL: @reduction_sum( 94; CHECK-NEXT: entry: 95; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 96; CHECK: vector.ph: 97; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 98; CHECK: vector.body: 99; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 100; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 101; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ] 102; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] 103; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 104; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 105; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 106; CHECK: pred.load.if: 107; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 108; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 109; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 110; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 111; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 112; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 113; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 114; CHECK: pred.load.continue: 115; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 116; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 117; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 118; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 119; CHECK: pred.load.if1: 120; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 121; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 122; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 123; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 124; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 125; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 126; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 127; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 128; CHECK: pred.load.continue2: 129; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 130; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 131; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 132; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 133; CHECK: pred.load.if3: 134; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 135; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 136; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 137; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 138; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 139; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 140; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 141; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 142; CHECK: pred.load.continue4: 143; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 144; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 145; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 146; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 147; CHECK: pred.load.if5: 148; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 149; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 150; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 151; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 152; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 153; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 154; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 155; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 156; CHECK: pred.load.continue6: 157; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 158; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 159; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer 160; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]]) 161; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]] 162; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer 163; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) 164; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], [[TMP42]] 165; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer 166; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) 167; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] 168; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 169; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 170; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4> 171; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 172; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 173; CHECK: middle.block: 174; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 175; CHECK: scalar.ph: 176; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 177; CHECK: .lr.ph: 178; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] 179; CHECK: ._crit_edge: 180; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] 181; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 182; 183entry: 184 br label %.lr.ph 185 186.lr.ph: ; preds = %entry, %.lr.ph 187 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 188 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 189 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 190 %l3 = load i32, i32* %l2, align 4 191 %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 192 %l5 = load i32, i32* %l4, align 4 193 %l6 = trunc i64 %indvars.iv to i32 194 %l7 = add i32 %sum.02, %l6 195 %l8 = add i32 %l7, %l3 196 %l9 = add i32 %l8, %l5 197 %indvars.iv.next = add i64 %indvars.iv, 1 198 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 199 %exitcond = icmp eq i32 %lftr.wideiv, 257 200 br i1 %exitcond, label %._crit_edge, label %.lr.ph 201 202._crit_edge: ; preds = %.lr.ph 203 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 204 ret i32 %sum.0.lcssa 205} 206 207define i32 @reduction_sum_const(i32* noalias nocapture %A) { 208; CHECK-LABEL: @reduction_sum_const( 209; CHECK-NEXT: entry: 210; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 211; CHECK: vector.ph: 212; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 213; CHECK: vector.body: 214; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 215; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 216; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] 217; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 218; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 219; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 220; CHECK: pred.load.if: 221; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 222; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 223; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 224; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 225; CHECK: pred.load.continue: 226; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 227; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 228; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 229; CHECK: pred.load.if1: 230; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 231; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 232; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 233; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1 234; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 235; CHECK: pred.load.continue2: 236; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 237; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 238; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 239; CHECK: pred.load.if3: 240; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 241; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 242; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 243; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2 244; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 245; CHECK: pred.load.continue4: 246; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 247; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 248; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 249; CHECK: pred.load.if5: 250; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 251; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 252; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 253; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3 254; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 255; CHECK: pred.load.continue6: 256; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 257; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 258; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) 259; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]] 260; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> zeroinitializer 261; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) 262; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]] 263; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 264; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 265; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 266; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 267; CHECK: middle.block: 268; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 269; CHECK: scalar.ph: 270; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 271; CHECK: .lr.ph: 272; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] 273; CHECK: ._crit_edge: 274; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] 275; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 276; 277entry: 278 br label %.lr.ph 279 280.lr.ph: ; preds = %entry, %.lr.ph 281 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 282 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 283 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 284 %l3 = load i32, i32* %l2, align 4 285 %l7 = add i32 %sum.02, %l3 286 %l9 = add i32 %l7, 3 287 %indvars.iv.next = add i64 %indvars.iv, 1 288 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 289 %exitcond = icmp eq i32 %lftr.wideiv, 257 290 br i1 %exitcond, label %._crit_edge, label %.lr.ph 291 292._crit_edge: ; preds = %.lr.ph 293 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 294 ret i32 %sum.0.lcssa 295} 296 297define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) { 298; CHECK-LABEL: @reduction_prod( 299; CHECK-NEXT: entry: 300; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 301; CHECK: vector.ph: 302; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 303; CHECK: vector.body: 304; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 305; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 306; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ] 307; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] 308; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 309; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 310; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 311; CHECK: pred.load.if: 312; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 313; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 314; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 315; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 316; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 317; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 318; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 319; CHECK: pred.load.continue: 320; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 321; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 322; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 323; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 324; CHECK: pred.load.if1: 325; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 326; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 327; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 328; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 329; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 330; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 331; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 332; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 333; CHECK: pred.load.continue2: 334; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 335; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 336; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 337; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 338; CHECK: pred.load.if3: 339; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 340; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 341; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 342; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 343; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 344; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 345; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 346; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 347; CHECK: pred.load.continue4: 348; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 349; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 350; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 351; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 352; CHECK: pred.load.if5: 353; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 354; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 355; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 356; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 357; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 358; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 359; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 360; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 361; CHECK: pred.load.continue6: 362; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 363; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 364; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 365; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) 366; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] 367; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 368; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) 369; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]] 370; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 371; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) 372; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] 373; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 374; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 375; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4> 376; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 377; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 378; CHECK: middle.block: 379; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 380; CHECK: scalar.ph: 381; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 382; CHECK: .lr.ph: 383; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] 384; CHECK: ._crit_edge: 385; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] 386; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] 387; 388entry: 389 br label %.lr.ph 390 391.lr.ph: ; preds = %entry, %.lr.ph 392 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 393 %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] 394 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 395 %l3 = load i32, i32* %l2, align 4 396 %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 397 %l5 = load i32, i32* %l4, align 4 398 %l6 = trunc i64 %indvars.iv to i32 399 %l7 = mul i32 %prod.02, %l6 400 %l8 = mul i32 %l7, %l3 401 %l9 = mul i32 %l8, %l5 402 %indvars.iv.next = add i64 %indvars.iv, 1 403 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 404 %exitcond = icmp eq i32 %lftr.wideiv, 257 405 br i1 %exitcond, label %._crit_edge, label %.lr.ph 406 407._crit_edge: ; preds = %.lr.ph 408 %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] 409 ret i32 %prod.0.lcssa 410} 411 412define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) { 413; CHECK-LABEL: @reduction_mix( 414; CHECK-NEXT: entry: 415; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 416; CHECK: vector.ph: 417; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 418; CHECK: vector.body: 419; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 420; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 421; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE6]] ] 422; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[PRED_LOAD_CONTINUE6]] ] 423; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 424; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 425; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 426; CHECK: pred.load.if: 427; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 428; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 429; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 430; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 431; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 432; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 433; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 434; CHECK: pred.load.continue: 435; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 436; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 437; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 438; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 439; CHECK: pred.load.if1: 440; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 441; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 442; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 443; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 444; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 445; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 446; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 447; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 448; CHECK: pred.load.continue2: 449; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 450; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 451; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 452; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 453; CHECK: pred.load.if3: 454; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 455; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 456; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 457; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 458; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 459; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 460; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 461; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 462; CHECK: pred.load.continue4: 463; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 464; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 465; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 466; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 467; CHECK: pred.load.if5: 468; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 469; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 470; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 471; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 472; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 473; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 474; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 475; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 476; CHECK: pred.load.continue6: 477; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 478; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 479; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]] 480; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND7]], <4 x i32> zeroinitializer 481; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]]) 482; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]] 483; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 484; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) 485; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] 486; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 487; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 488; CHECK-NEXT: [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], <i32 4, i32 4, i32 4, i32 4> 489; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 490; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 491; CHECK: middle.block: 492; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 493; CHECK: scalar.ph: 494; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 495; CHECK: .lr.ph: 496; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]] 497; CHECK: ._crit_edge: 498; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] 499; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 500; 501entry: 502 br label %.lr.ph 503 504.lr.ph: ; preds = %entry, %.lr.ph 505 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 506 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 507 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 508 %l3 = load i32, i32* %l2, align 4 509 %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 510 %l5 = load i32, i32* %l4, align 4 511 %l6 = mul nsw i32 %l5, %l3 512 %l7 = trunc i64 %indvars.iv to i32 513 %l8 = add i32 %sum.02, %l7 514 %l9 = add i32 %l8, %l6 515 %indvars.iv.next = add i64 %indvars.iv, 1 516 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 517 %exitcond = icmp eq i32 %lftr.wideiv, 257 518 br i1 %exitcond, label %._crit_edge, label %.lr.ph 519 520._crit_edge: ; preds = %.lr.ph 521 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 522 ret i32 %sum.0.lcssa 523} 524 525define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) { 526; CHECK-LABEL: @reduction_mul( 527; CHECK-NEXT: entry: 528; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 529; CHECK: vector.ph: 530; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 531; CHECK: vector.body: 532; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 533; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 534; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 535; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 536; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 537; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 538; CHECK: pred.load.if: 539; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 540; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 541; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 542; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 543; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 544; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 545; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 546; CHECK: pred.load.continue: 547; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 548; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 549; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 550; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 551; CHECK: pred.load.if1: 552; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 553; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 554; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 555; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 556; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 557; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 558; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 559; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 560; CHECK: pred.load.continue2: 561; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 562; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 563; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 564; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 565; CHECK: pred.load.if3: 566; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 567; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 568; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 569; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 570; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 571; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 572; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 573; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 574; CHECK: pred.load.continue4: 575; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 576; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 577; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 578; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 579; CHECK: pred.load.if5: 580; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 581; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 582; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 583; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 584; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 585; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 586; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 587; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 588; CHECK: pred.load.continue6: 589; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 590; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 591; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 592; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) 593; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] 594; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> 595; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) 596; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] 597; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 598; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 599; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 600; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 601; CHECK: middle.block: 602; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 603; CHECK: scalar.ph: 604; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 605; CHECK: .lr.ph: 606; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]] 607; CHECK: ._crit_edge: 608; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 609; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 610; 611entry: 612 br label %.lr.ph 613 614.lr.ph: ; preds = %entry, %.lr.ph 615 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 616 %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ] 617 %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 618 %l3 = load i32, i32* %l2, align 4 619 %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 620 %l5 = load i32, i32* %l4, align 4 621 %l6 = mul i32 %sum.02, %l3 622 %l7 = mul i32 %l6, %l5 623 %indvars.iv.next = add i64 %indvars.iv, 1 624 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 625 %exitcond = icmp eq i32 %lftr.wideiv, 257 626 br i1 %exitcond, label %._crit_edge, label %.lr.ph 627 628._crit_edge: ; preds = %.lr.ph 629 %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] 630 ret i32 %sum.0.lcssa 631} 632 633define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) { 634; CHECK-LABEL: @reduction_and( 635; CHECK-NEXT: entry: 636; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 637; CHECK: vector.ph: 638; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 639; CHECK: vector.body: 640; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 641; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 642; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 643; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 644; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 645; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 646; CHECK: pred.load.if: 647; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 648; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 649; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 650; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 651; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 652; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 653; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 654; CHECK: pred.load.continue: 655; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 656; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 657; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 658; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 659; CHECK: pred.load.if1: 660; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 661; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 662; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 663; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 664; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 665; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 666; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 667; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 668; CHECK: pred.load.continue2: 669; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 670; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 671; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 672; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 673; CHECK: pred.load.if3: 674; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 675; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 676; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 677; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 678; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 679; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 680; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 681; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 682; CHECK: pred.load.continue4: 683; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 684; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 685; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 686; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 687; CHECK: pred.load.if5: 688; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 689; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 690; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 691; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 692; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 693; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 694; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 695; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 696; CHECK: pred.load.continue6: 697; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 698; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 699; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 700; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]]) 701; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]] 702; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 703; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) 704; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] 705; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 706; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 707; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 708; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 709; CHECK: middle.block: 710; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 711; CHECK: scalar.ph: 712; CHECK-NEXT: br label [[FOR_BODY:%.*]] 713; CHECK: for.body: 714; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 715; CHECK: for.end: 716; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 717; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 718; 719entry: 720 br label %for.body 721 722for.body: ; preds = %entry, %for.body 723 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 724 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] 725 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 726 %l0 = load i32, i32* %arrayidx, align 4 727 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 728 %l1 = load i32, i32* %arrayidx2, align 4 729 %add = and i32 %result.08, %l0 730 %and = and i32 %add, %l1 731 %indvars.iv.next = add i64 %indvars.iv, 1 732 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 733 %exitcond = icmp eq i32 %lftr.wideiv, 257 734 br i1 %exitcond, label %for.end, label %for.body 735 736for.end: ; preds = %for.body, %entry 737 %result.0.lcssa = phi i32 [ %and, %for.body ] 738 ret i32 %result.0.lcssa 739} 740 741define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) { 742; CHECK-LABEL: @reduction_or( 743; CHECK-NEXT: entry: 744; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 745; CHECK: vector.ph: 746; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 747; CHECK: vector.body: 748; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 749; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 750; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] 751; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 752; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 753; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 754; CHECK: pred.load.if: 755; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 756; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 757; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 758; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 759; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 760; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 761; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 762; CHECK: pred.load.continue: 763; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 764; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 765; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 766; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 767; CHECK: pred.load.if1: 768; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 769; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 770; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 771; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 772; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 773; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 774; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 775; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 776; CHECK: pred.load.continue2: 777; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 778; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 779; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 780; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 781; CHECK: pred.load.if3: 782; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 783; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 784; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 785; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 786; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 787; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 788; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 789; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 790; CHECK: pred.load.continue4: 791; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 792; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 793; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 794; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 795; CHECK: pred.load.if5: 796; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 797; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 798; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 799; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 800; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 801; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 802; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 803; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 804; CHECK: pred.load.continue6: 805; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 806; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 807; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] 808; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 809; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) 810; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] 811; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 812; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 813; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 814; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 815; CHECK: middle.block: 816; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 817; CHECK: scalar.ph: 818; CHECK-NEXT: br label [[FOR_BODY:%.*]] 819; CHECK: for.body: 820; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 821; CHECK: for.end: 822; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 823; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 824; 825entry: 826 br label %for.body 827 828for.body: ; preds = %entry, %for.body 829 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 830 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 831 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 832 %l0 = load i32, i32* %arrayidx, align 4 833 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 834 %l1 = load i32, i32* %arrayidx2, align 4 835 %add = add nsw i32 %l1, %l0 836 %or = or i32 %add, %result.08 837 %indvars.iv.next = add i64 %indvars.iv, 1 838 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 839 %exitcond = icmp eq i32 %lftr.wideiv, 257 840 br i1 %exitcond, label %for.end, label %for.body 841 842for.end: ; preds = %for.body, %entry 843 %result.0.lcssa = phi i32 [ %or, %for.body ] 844 ret i32 %result.0.lcssa 845} 846 847define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) { 848; CHECK-LABEL: @reduction_xor( 849; CHECK-NEXT: entry: 850; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 851; CHECK: vector.ph: 852; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 853; CHECK: vector.body: 854; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 855; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 856; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] 857; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 858; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 859; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 860; CHECK: pred.load.if: 861; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 862; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 863; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 864; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 865; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 866; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0 867; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 868; CHECK: pred.load.continue: 869; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 870; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 871; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 872; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 873; CHECK: pred.load.if1: 874; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 875; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]] 876; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 877; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i32 1 878; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP11]] 879; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 880; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i32 1 881; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 882; CHECK: pred.load.continue2: 883; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 884; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 885; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 886; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 887; CHECK: pred.load.if3: 888; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 889; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP21]] 890; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 891; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i32 2 892; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 893; CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 894; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i32 2 895; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 896; CHECK: pred.load.continue4: 897; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 898; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 899; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 900; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 901; CHECK: pred.load.if5: 902; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 903; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]] 904; CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 905; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i32 3 906; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP31]] 907; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 908; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i32 3 909; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 910; CHECK: pred.load.continue6: 911; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 912; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 913; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] 914; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 915; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) 916; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] 917; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 918; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 919; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 920; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 921; CHECK: middle.block: 922; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 923; CHECK: scalar.ph: 924; CHECK-NEXT: br label [[FOR_BODY:%.*]] 925; CHECK: for.body: 926; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 927; CHECK: for.end: 928; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 929; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 930; 931entry: 932 br label %for.body 933 934for.body: ; preds = %entry, %for.body 935 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 936 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] 937 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 938 %l0 = load i32, i32* %arrayidx, align 4 939 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 940 %l1 = load i32, i32* %arrayidx2, align 4 941 %add = add nsw i32 %l1, %l0 942 %xor = xor i32 %add, %result.08 943 %indvars.iv.next = add i64 %indvars.iv, 1 944 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 945 %exitcond = icmp eq i32 %lftr.wideiv, 257 946 br i1 %exitcond, label %for.end, label %for.body 947 948for.end: ; preds = %for.body, %entry 949 %result.0.lcssa = phi i32 [ %xor, %for.body ] 950 ret i32 %result.0.lcssa 951} 952 953define float @reduction_fadd(float* nocapture %A, float* nocapture %B) { 954; CHECK-LABEL: @reduction_fadd( 955; CHECK-NEXT: entry: 956; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 957; CHECK: vector.ph: 958; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 959; CHECK: vector.body: 960; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 961; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 962; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 963; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 964; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 965; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 966; CHECK: pred.load.if: 967; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 968; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4 969; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 970; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 971; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 972; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 973; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 974; CHECK: pred.load.continue: 975; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 976; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 977; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 978; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 979; CHECK: pred.load.if1: 980; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 981; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] 982; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP12]], align 4 983; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i32 1 984; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]] 985; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 986; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i32 1 987; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 988; CHECK: pred.load.continue2: 989; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 990; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 991; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 992; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 993; CHECK: pred.load.if3: 994; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 995; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] 996; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 997; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i32 2 998; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]] 999; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 1000; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i32 2 1001; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1002; CHECK: pred.load.continue4: 1003; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 1004; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 1005; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1006; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1007; CHECK: pred.load.if5: 1008; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 1009; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] 1010; CHECK-NEXT: [[TMP33:%.*]] = load float, float* [[TMP32]], align 4 1011; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i32 3 1012; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]] 1013; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 1014; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i32 3 1015; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1016; CHECK: pred.load.continue6: 1017; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 1018; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 1019; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer 1020; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP40]]) 1021; CHECK-NEXT: [[TMP42:%.*]] = fadd float [[TMP41]], [[VEC_PHI]] 1022; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer 1023; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP43]]) 1024; CHECK-NEXT: [[TMP45]] = fadd float [[TMP44]], [[TMP42]] 1025; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1026; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1027; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1028; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1029; CHECK: middle.block: 1030; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1031; CHECK: scalar.ph: 1032; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1033; CHECK: for.body: 1034; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1035; CHECK: for.end: 1036; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 1037; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 1038; 1039entry: 1040 br label %for.body 1041 1042for.body: ; preds = %entry, %for.body 1043 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1044 %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] 1045 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1046 %l0 = load float, float* %arrayidx, align 4 1047 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1048 %l1 = load float, float* %arrayidx2, align 4 1049 %add = fadd fast float %result.08, %l0 1050 %fadd = fadd fast float %add, %l1 1051 %indvars.iv.next = add i64 %indvars.iv, 1 1052 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1053 %exitcond = icmp eq i32 %lftr.wideiv, 257 1054 br i1 %exitcond, label %for.end, label %for.body 1055 1056for.end: ; preds = %for.body, %entry 1057 %result.0.lcssa = phi float [ %fadd, %for.body ] 1058 ret float %result.0.lcssa 1059} 1060 1061define float @reduction_fmul(float* nocapture %A, float* nocapture %B) { 1062; CHECK-LABEL: @reduction_fmul( 1063; CHECK-NEXT: entry: 1064; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1065; CHECK: vector.ph: 1066; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1067; CHECK: vector.body: 1068; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1069; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1070; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1071; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 1072; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 1073; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1074; CHECK: pred.load.if: 1075; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1076; CHECK-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4 1077; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i32 0 1078; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1079; CHECK-NEXT: [[TMP6:%.*]] = load float, float* [[TMP5]], align 4 1080; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0 1081; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1082; CHECK: pred.load.continue: 1083; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1084; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 1085; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 1086; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1087; CHECK: pred.load.if1: 1088; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[INDEX]], 1 1089; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]] 1090; CHECK-NEXT: [[TMP13:%.*]] = load float, float* [[TMP12]], align 4 1091; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i32 1 1092; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP11]] 1093; CHECK-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 4 1094; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i32 1 1095; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1096; CHECK: pred.load.continue2: 1097; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 1098; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 1099; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 1100; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1101; CHECK: pred.load.if3: 1102; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 2 1103; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] 1104; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 1105; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i32 2 1106; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP21]] 1107; CHECK-NEXT: [[TMP26:%.*]] = load float, float* [[TMP25]], align 4 1108; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i32 2 1109; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1110; CHECK: pred.load.continue4: 1111; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 1112; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 1113; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1114; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1115; CHECK: pred.load.if5: 1116; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 3 1117; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]] 1118; CHECK-NEXT: [[TMP33:%.*]] = load float, float* [[TMP32]], align 4 1119; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i32 3 1120; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP31]] 1121; CHECK-NEXT: [[TMP36:%.*]] = load float, float* [[TMP35]], align 4 1122; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i32 3 1123; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1124; CHECK: pred.load.continue6: 1125; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 1126; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 1127; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1128; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) 1129; CHECK-NEXT: [[TMP42:%.*]] = fmul float [[TMP41]], [[VEC_PHI]] 1130; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1131; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) 1132; CHECK-NEXT: [[TMP45]] = fmul float [[TMP44]], [[TMP42]] 1133; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1134; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1135; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1136; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1137; CHECK: middle.block: 1138; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1139; CHECK: scalar.ph: 1140; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1141; CHECK: for.body: 1142; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1143; CHECK: for.end: 1144; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 1145; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 1146; 1147entry: 1148 br label %for.body 1149 1150for.body: ; preds = %entry, %for.body 1151 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1152 %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 1153 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1154 %l0 = load float, float* %arrayidx, align 4 1155 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1156 %l1 = load float, float* %arrayidx2, align 4 1157 %add = fmul fast float %result.08, %l0 1158 %fmul = fmul fast float %add, %l1 1159 %indvars.iv.next = add i64 %indvars.iv, 1 1160 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1161 %exitcond = icmp eq i32 %lftr.wideiv, 257 1162 br i1 %exitcond, label %for.end, label %for.body 1163 1164for.end: ; preds = %for.body, %entry 1165 %result.0.lcssa = phi float [ %fmul, %for.body ] 1166 ret float %result.0.lcssa 1167} 1168 1169define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) { 1170; CHECK-LABEL: @reduction_min( 1171; CHECK-NEXT: entry: 1172; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1173; CHECK: vector.ph: 1174; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1175; CHECK: vector.body: 1176; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1177; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1178; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1179; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 1180; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 1181; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1182; CHECK: pred.load.if: 1183; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 1184; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 1185; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 1186; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1187; CHECK: pred.load.continue: 1188; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1189; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 1190; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1191; CHECK: pred.load.if1: 1192; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 1193; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 1194; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 1195; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1 1196; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1197; CHECK: pred.load.continue2: 1198; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 1199; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 1200; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1201; CHECK: pred.load.if3: 1202; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 1203; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 1204; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 1205; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2 1206; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1207; CHECK: pred.load.continue4: 1208; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 1209; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1210; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1211; CHECK: pred.load.if5: 1212; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 1213; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 1214; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 1215; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3 1216; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1217; CHECK: pred.load.continue6: 1218; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 1219; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> 1220; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) 1221; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP25]], [[VEC_PHI]] 1222; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]] 1223; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1224; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1225; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1226; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1227; CHECK: middle.block: 1228; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1229; CHECK: scalar.ph: 1230; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1231; CHECK: for.body: 1232; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1233; CHECK: for.end: 1234; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1235; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 1236; 1237entry: 1238 br label %for.body 1239 1240for.body: ; preds = %entry, %for.body 1241 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1242 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 1243 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 1244 %l0 = load i32, i32* %arrayidx, align 4 1245 %c0 = icmp slt i32 %result.08, %l0 1246 %v0 = select i1 %c0, i32 %result.08, i32 %l0 1247 %indvars.iv.next = add i64 %indvars.iv, 1 1248 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1249 %exitcond = icmp eq i32 %lftr.wideiv, 257 1250 br i1 %exitcond, label %for.end, label %for.body 1251 1252for.end: ; preds = %for.body, %entry 1253 %result.0.lcssa = phi i32 [ %v0, %for.body ] 1254 ret i32 %result.0.lcssa 1255} 1256 1257define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) { 1258; CHECK-LABEL: @reduction_max( 1259; CHECK-NEXT: entry: 1260; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1261; CHECK: vector.ph: 1262; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1263; CHECK: vector.body: 1264; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1265; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1266; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1267; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257> 1268; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 1269; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1270; CHECK: pred.load.if: 1271; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 1272; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 1273; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0 1274; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1275; CHECK: pred.load.continue: 1276; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1277; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 1278; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1279; CHECK: pred.load.if1: 1280; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[INDEX]], 1 1281; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]] 1282; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 1283; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1 1284; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1285; CHECK: pred.load.continue2: 1286; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 1287; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 1288; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1289; CHECK: pred.load.if3: 1290; CHECK-NEXT: [[TMP13:%.*]] = or i64 [[INDEX]], 2 1291; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]] 1292; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 1293; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2 1294; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1295; CHECK: pred.load.continue4: 1296; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 1297; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1298; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1299; CHECK: pred.load.if5: 1300; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[INDEX]], 3 1301; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]] 1302; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 1303; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3 1304; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1305; CHECK: pred.load.continue6: 1306; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 1307; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 1308; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) 1309; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP25]], [[VEC_PHI]] 1310; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]] 1311; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1312; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1313; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1314; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1315; CHECK: middle.block: 1316; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1317; CHECK: scalar.ph: 1318; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1319; CHECK: for.body: 1320; CHECK-NEXT: br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1321; CHECK: for.end: 1322; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 1323; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 1324; 1325entry: 1326 br label %for.body 1327 1328for.body: ; preds = %entry, %for.body 1329 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1330 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 1331 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 1332 %l0 = load i32, i32* %arrayidx, align 4 1333 %c0 = icmp ugt i32 %result.08, %l0 1334 %v0 = select i1 %c0, i32 %result.08, i32 %l0 1335 %indvars.iv.next = add i64 %indvars.iv, 1 1336 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1337 %exitcond = icmp eq i32 %lftr.wideiv, 257 1338 br i1 %exitcond, label %for.end, label %for.body 1339 1340for.end: ; preds = %for.body, %entry 1341 %result.0.lcssa = phi i32 [ %v0, %for.body ] 1342 ret i32 %result.0.lcssa 1343} 1344 1345; Conditional reductions with multi-input phis. 1346define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { 1347; CHECK-LABEL: @reduction_conditional( 1348; CHECK-NEXT: entry: 1349; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1350; CHECK: vector.ph: 1351; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i32 0 1352; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1353; CHECK: vector.body: 1354; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1355; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] 1356; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1357; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>* 1358; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1359; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1360; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 1361; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 1362; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 1363; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1364; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1365; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] 1366; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]] 1367; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true> 1368; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] 1369; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true> 1370; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] 1371; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] 1372; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> [[TMP11]] 1373; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] 1374; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1375; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 1376; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1377; CHECK: middle.block: 1378; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]]) 1379; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1380; CHECK: scalar.ph: 1381; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1382; CHECK: for.body: 1383; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] 1384; CHECK: if.then: 1385; CHECK-NEXT: br i1 undef, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] 1386; CHECK: if.then8: 1387; CHECK-NEXT: br label [[FOR_INC]] 1388; CHECK: if.else: 1389; CHECK-NEXT: br i1 undef, label [[IF_THEN16:%.*]], label [[FOR_INC]] 1390; CHECK: if.then16: 1391; CHECK-NEXT: br label [[FOR_INC]] 1392; CHECK: for.inc: 1393; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]] 1394; CHECK: for.end: 1395; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ undef, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 1396; CHECK-NEXT: ret float [[SUM_1_LCSSA]] 1397; 1398entry: 1399 br label %for.body 1400 1401for.body: 1402 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1403 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] 1404 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1405 %l0 = load float, float* %arrayidx, align 4 1406 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1407 %l1 = load float, float* %arrayidx2, align 4 1408 %cmp3 = fcmp ogt float %l0, %l1 1409 br i1 %cmp3, label %if.then, label %for.inc 1410 1411if.then: 1412 %cmp6 = fcmp ogt float %l1, 1.000000e+00 1413 br i1 %cmp6, label %if.then8, label %if.else 1414 1415if.then8: 1416 %add = fadd fast float %sum.033, %l0 1417 br label %for.inc 1418 1419if.else: 1420 %cmp14 = fcmp ogt float %l0, 2.000000e+00 1421 br i1 %cmp14, label %if.then16, label %for.inc 1422 1423if.then16: 1424 %add19 = fadd fast float %sum.033, %l1 1425 br label %for.inc 1426 1427for.inc: 1428 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ] 1429 %indvars.iv.next = add i64 %indvars.iv, 1 1430 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1431 %exitcond = icmp ne i32 %lftr.wideiv, 128 1432 br i1 %exitcond, label %for.body, label %for.end 1433 1434for.end: 1435 %sum.1.lcssa = phi float [ %sum.1, %for.inc ] 1436 ret float %sum.1.lcssa 1437} 1438 1439define i8 @reduction_add_trunc(i8* noalias nocapture %A) { 1440; CHECK-LABEL: @reduction_add_trunc( 1441; CHECK-NEXT: entry: 1442; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1443; CHECK: vector.ph: 1444; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1445; CHECK: vector.body: 1446; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1447; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1448; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1449; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257> 1450; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255> 1451; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 1452; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1453; CHECK: pred.load.if: 1454; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INDEX]] to i64 1455; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]] 1456; CHECK-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 4 1457; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i32 0 1458; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1459; CHECK: pred.load.continue: 1460; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 1461; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 1462; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1463; CHECK: pred.load.if1: 1464; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[INDEX]], 1 1465; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 1466; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP10]] 1467; CHECK-NEXT: [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4 1468; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i32 1 1469; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1470; CHECK: pred.load.continue2: 1471; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] 1472; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 1473; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1474; CHECK: pred.load.if3: 1475; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[INDEX]], 2 1476; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 1477; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP17]] 1478; CHECK-NEXT: [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 4 1479; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i32 2 1480; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1481; CHECK: pred.load.continue4: 1482; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i8> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ] 1483; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1484; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1485; CHECK: pred.load.if5: 1486; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[INDEX]], 3 1487; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 1488; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP24]] 1489; CHECK-NEXT: [[TMP26:%.*]] = load i8, i8* [[TMP25]], align 4 1490; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i32 3 1491; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1492; CHECK: pred.load.continue6: 1493; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] 1494; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> 1495; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]] 1496; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 1497; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 1498; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 1499; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1500; CHECK: middle.block: 1501; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]] 1502; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8> 1503; CHECK-NEXT: [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]]) 1504; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1505; CHECK: scalar.ph: 1506; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1507; CHECK: .lr.ph: 1508; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]] 1509; CHECK: ._crit_edge: 1510; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ undef, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] 1511; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] 1512; 1513entry: 1514 br label %.lr.ph 1515 1516.lr.ph: ; preds = %entry, %.lr.ph 1517 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 1518 %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ] 1519 %sum.02 = and i32 %sum.02p, 255 1520 %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv 1521 %l3 = load i8, i8* %l2, align 4 1522 %l3e = zext i8 %l3 to i32 1523 %l9 = add i32 %sum.02, %l3e 1524 %indvars.iv.next = add i32 %indvars.iv, 1 1525 %exitcond = icmp eq i32 %indvars.iv.next, 257 1526 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1527 1528._crit_edge: ; preds = %.lr.ph 1529 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 1530 %ret = trunc i32 %sum.0.lcssa to i8 1531 ret i8 %ret 1532} 1533 1534 1535define i8 @reduction_and_trunc(i8* noalias nocapture %A) { 1536; CHECK-LABEL: @reduction_and_trunc( 1537; CHECK-NEXT: entry: 1538; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1539; CHECK: vector.ph: 1540; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1541; CHECK: vector.body: 1542; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1543; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1544; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 255, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1545; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257> 1546; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 1547; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP1]]) 1548; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[VEC_PHI]] 1549; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 1550; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1551; CHECK: pred.load.if: 1552; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64 1553; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP5]] 1554; CHECK-NEXT: [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 4 1555; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP7]], i32 0 1556; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1557; CHECK: pred.load.continue: 1558; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] 1559; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 1560; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1561; CHECK: pred.load.if1: 1562; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[INDEX]], 1 1563; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 1564; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP12]] 1565; CHECK-NEXT: [[TMP14:%.*]] = load i8, i8* [[TMP13]], align 4 1566; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> [[TMP9]], i8 [[TMP14]], i32 1 1567; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1568; CHECK: pred.load.continue2: 1569; CHECK-NEXT: [[TMP16:%.*]] = phi <4 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], [[PRED_LOAD_IF1]] ] 1570; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 1571; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1572; CHECK: pred.load.if3: 1573; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[INDEX]], 2 1574; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 1575; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP19]] 1576; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP20]], align 4 1577; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP21]], i32 2 1578; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1579; CHECK: pred.load.continue4: 1580; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i8> [ [[TMP16]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP22]], [[PRED_LOAD_IF3]] ] 1581; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 1582; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1583; CHECK: pred.load.if5: 1584; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[INDEX]], 3 1585; CHECK-NEXT: [[TMP26:%.*]] = sext i32 [[TMP25]] to i64 1586; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP26]] 1587; CHECK-NEXT: [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 4 1588; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i8> [[TMP23]], i8 [[TMP28]], i32 3 1589; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1590; CHECK: pred.load.continue6: 1591; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i8> [ [[TMP23]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP29]], [[PRED_LOAD_IF5]] ] 1592; CHECK-NEXT: [[TMP31:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32> 1593; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP31]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 1594; CHECK-NEXT: [[TMP33:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP32]]) 1595; CHECK-NEXT: [[TMP34]] = and i32 [[TMP33]], [[TMP3]] 1596; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 1597; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> 1598; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 1599; CHECK-NEXT: br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1600; CHECK: middle.block: 1601; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1602; CHECK: scalar.ph: 1603; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1604; CHECK: .lr.ph: 1605; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]] 1606; CHECK: ._crit_edge: 1607; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] 1608; CHECK-NEXT: [[RET:%.*]] = trunc i32 [[SUM_0_LCSSA]] to i8 1609; CHECK-NEXT: ret i8 [[RET]] 1610; 1611entry: 1612 br label %.lr.ph 1613 1614.lr.ph: ; preds = %entry, %.lr.ph 1615 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 1616 %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ] 1617 %sum.02 = and i32 %sum.02p, 255 1618 %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv 1619 %l3 = load i8, i8* %l2, align 4 1620 %l3e = zext i8 %l3 to i32 1621 %l9 = and i32 %sum.02, %l3e 1622 %indvars.iv.next = add i32 %indvars.iv, 1 1623 %exitcond = icmp eq i32 %indvars.iv.next, 257 1624 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1625 1626._crit_edge: ; preds = %.lr.ph 1627 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 1628 %ret = trunc i32 %sum.0.lcssa to i8 1629 ret i8 %ret 1630} 1631