1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 6define i32 @reduction_sum_single(ptr noalias nocapture %A) { 7; CHECK-LABEL: @reduction_sum_single( 8; CHECK-NEXT: entry: 9; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 10; CHECK: vector.ph: 11; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 12; CHECK: vector.body: 13; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 14; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 15; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ] 16; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 17; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 18; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 19; CHECK: pred.load.if: 20; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 21; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 22; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 23; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 24; CHECK: pred.load.continue: 25; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 26; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 27; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 28; CHECK: pred.load.if1: 29; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1 30; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] 31; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 32; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1 33; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 34; CHECK: pred.load.continue2: 35; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 36; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 37; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 38; CHECK: pred.load.if3: 39; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2 40; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 41; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 42; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2 43; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 44; CHECK: pred.load.continue4: 45; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 46; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 47; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 48; CHECK: pred.load.if5: 49; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3 50; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] 51; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 52; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3 53; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 54; CHECK: pred.load.continue6: 55; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 56; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 57; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) 58; CHECK-NEXT: [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]] 59; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 60; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 61; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 62; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 63; CHECK: middle.block: 64; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 65; CHECK: scalar.ph: 66; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 67; CHECK: .lr.ph: 68; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]] 69; CHECK: ._crit_edge: 70; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ] 71; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 72; 73entry: 74 br label %.lr.ph 75 76.lr.ph: ; preds = %entry, %.lr.ph 77 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 78 %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ] 79 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 80 %l3 = load i32, ptr %l2, align 4 81 %l7 = add i32 %sum.02, %l3 82 %indvars.iv.next = add i64 %indvars.iv, 1 83 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 84 %exitcond = icmp eq i32 %lftr.wideiv, 257 85 br i1 %exitcond, label %._crit_edge, label %.lr.ph 86 87._crit_edge: ; preds = %.lr.ph 88 %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] 89 ret i32 %sum.0.lcssa 90} 91 92define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { 93; CHECK-LABEL: @reduction_sum( 94; CHECK-NEXT: entry: 95; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 96; CHECK: vector.ph: 97; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 98; CHECK: vector.body: 99; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ] 100; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] 101; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] 102; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] 103; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 104; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 105; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 106; CHECK: pred.load.if: 107; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 108; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 109; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 110; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 111; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 112; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 113; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 114; CHECK: pred.load.continue: 115; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 116; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 117; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 118; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 119; CHECK: pred.load.if3: 120; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 121; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 122; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 123; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 124; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 125; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 126; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 127; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 128; CHECK: pred.load.continue4: 129; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 130; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] 131; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 132; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 133; CHECK: pred.load.if5: 134; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 135; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 136; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 137; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 138; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 139; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 140; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 141; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 142; CHECK: pred.load.continue6: 143; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 144; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] 145; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 146; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] 147; CHECK: pred.load.if7: 148; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 149; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 150; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 151; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 152; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 153; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 154; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 155; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] 156; CHECK: pred.load.continue8: 157; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] 158; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] 159; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer 160; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]]) 161; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]] 162; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer 163; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]]) 164; CHECK-NEXT: [[TMP45:%.*]] = add i32 [[TMP44]], [[TMP42]] 165; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer 166; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]]) 167; CHECK-NEXT: [[TMP48]] = add i32 [[TMP47]], [[TMP45]] 168; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 169; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 170; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) 171; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 172; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 173; CHECK: middle.block: 174; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 175; CHECK: scalar.ph: 176; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 177; CHECK: .lr.ph: 178; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]] 179; CHECK: ._crit_edge: 180; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] 181; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 182; 183entry: 184 br label %.lr.ph 185 186.lr.ph: ; preds = %entry, %.lr.ph 187 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 188 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 189 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 190 %l3 = load i32, ptr %l2, align 4 191 %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 192 %l5 = load i32, ptr %l4, align 4 193 %l6 = trunc i64 %indvars.iv to i32 194 %l7 = add i32 %sum.02, %l6 195 %l8 = add i32 %l7, %l3 196 %l9 = add i32 %l8, %l5 197 %indvars.iv.next = add i64 %indvars.iv, 1 198 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 199 %exitcond = icmp eq i32 %lftr.wideiv, 257 200 br i1 %exitcond, label %._crit_edge, label %.lr.ph 201 202._crit_edge: ; preds = %.lr.ph 203 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 204 ret i32 %sum.0.lcssa 205} 206 207define i32 @reduction_sum_const(ptr noalias nocapture %A) { 208; CHECK-LABEL: @reduction_sum_const( 209; CHECK-NEXT: entry: 210; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 211; CHECK: vector.ph: 212; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 213; CHECK: vector.body: 214; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 215; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 216; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] 217; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 218; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 219; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 220; CHECK: pred.load.if: 221; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 222; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 223; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 224; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 225; CHECK: pred.load.continue: 226; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 227; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 228; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 229; CHECK: pred.load.if1: 230; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1 231; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] 232; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 233; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1 234; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 235; CHECK: pred.load.continue2: 236; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 237; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 238; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 239; CHECK: pred.load.if3: 240; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2 241; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 242; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 243; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2 244; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 245; CHECK: pred.load.continue4: 246; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 247; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 248; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 249; CHECK: pred.load.if5: 250; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3 251; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] 252; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 253; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3 254; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 255; CHECK: pred.load.continue6: 256; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 257; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 258; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]]) 259; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]] 260; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer 261; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]]) 262; CHECK-NEXT: [[TMP29]] = add i32 [[TMP28]], [[TMP26]] 263; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 264; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 265; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 266; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 267; CHECK: middle.block: 268; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 269; CHECK: scalar.ph: 270; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 271; CHECK: .lr.ph: 272; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]] 273; CHECK: ._crit_edge: 274; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ] 275; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 276; 277entry: 278 br label %.lr.ph 279 280.lr.ph: ; preds = %entry, %.lr.ph 281 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 282 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 283 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 284 %l3 = load i32, ptr %l2, align 4 285 %l7 = add i32 %sum.02, %l3 286 %l9 = add i32 %l7, 3 287 %indvars.iv.next = add i64 %indvars.iv, 1 288 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 289 %exitcond = icmp eq i32 %lftr.wideiv, 257 290 br i1 %exitcond, label %._crit_edge, label %.lr.ph 291 292._crit_edge: ; preds = %.lr.ph 293 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 294 ret i32 %sum.0.lcssa 295} 296 297define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { 298; CHECK-LABEL: @reduction_prod( 299; CHECK-NEXT: entry: 300; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 301; CHECK: vector.ph: 302; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 303; CHECK: vector.body: 304; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ] 305; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] 306; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ] 307; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] 308; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 309; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 310; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 311; CHECK: pred.load.if: 312; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 313; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 314; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 315; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 316; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 317; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 318; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 319; CHECK: pred.load.continue: 320; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 321; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 322; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 323; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 324; CHECK: pred.load.if3: 325; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 326; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 327; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 328; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 329; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 330; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 331; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 332; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 333; CHECK: pred.load.continue4: 334; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 335; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] 336; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 337; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 338; CHECK: pred.load.if5: 339; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 340; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 341; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 342; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 343; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 344; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 345; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 346; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 347; CHECK: pred.load.continue6: 348; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 349; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] 350; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 351; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] 352; CHECK: pred.load.if7: 353; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 354; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 355; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 356; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 357; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 358; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 359; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 360; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] 361; CHECK: pred.load.continue8: 362; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] 363; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] 364; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1) 365; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) 366; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] 367; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) 368; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) 369; CHECK-NEXT: [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]] 370; CHECK-NEXT: [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) 371; CHECK-NEXT: [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]]) 372; CHECK-NEXT: [[TMP48]] = mul i32 [[TMP47]], [[TMP45]] 373; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 374; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 375; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) 376; CHECK-NEXT: [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 377; CHECK-NEXT: br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 378; CHECK: middle.block: 379; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 380; CHECK: scalar.ph: 381; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 382; CHECK: .lr.ph: 383; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]] 384; CHECK: ._crit_edge: 385; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ] 386; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] 387; 388entry: 389 br label %.lr.ph 390 391.lr.ph: ; preds = %entry, %.lr.ph 392 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 393 %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ] 394 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 395 %l3 = load i32, ptr %l2, align 4 396 %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 397 %l5 = load i32, ptr %l4, align 4 398 %l6 = trunc i64 %indvars.iv to i32 399 %l7 = mul i32 %prod.02, %l6 400 %l8 = mul i32 %l7, %l3 401 %l9 = mul i32 %l8, %l5 402 %indvars.iv.next = add i64 %indvars.iv, 1 403 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 404 %exitcond = icmp eq i32 %lftr.wideiv, 257 405 br i1 %exitcond, label %._crit_edge, label %.lr.ph 406 407._crit_edge: ; preds = %.lr.ph 408 %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ] 409 ret i32 %prod.0.lcssa 410} 411 412define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { 413; CHECK-LABEL: @reduction_mix( 414; CHECK-NEXT: entry: 415; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 416; CHECK: vector.ph: 417; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 418; CHECK: vector.body: 419; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ] 420; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ] 421; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ] 422; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ] 423; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 424; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 425; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 426; CHECK: pred.load.if: 427; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 428; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 429; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 430; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 431; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 432; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 433; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 434; CHECK: pred.load.continue: 435; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 436; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 437; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 438; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 439; CHECK: pred.load.if3: 440; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 441; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 442; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 443; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 444; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 445; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 446; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 447; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 448; CHECK: pred.load.continue4: 449; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ] 450; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ] 451; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 452; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 453; CHECK: pred.load.if5: 454; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 455; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 456; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 457; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 458; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 459; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 460; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 461; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 462; CHECK: pred.load.continue6: 463; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 464; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] 465; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 466; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]] 467; CHECK: pred.load.if7: 468; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 469; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 470; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 471; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 472; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 473; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 474; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 475; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] 476; CHECK: pred.load.continue8: 477; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ] 478; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ] 479; CHECK-NEXT: [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]] 480; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer 481; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]]) 482; CHECK-NEXT: [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]] 483; CHECK-NEXT: [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 484; CHECK-NEXT: [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]]) 485; CHECK-NEXT: [[TMP46]] = add i32 [[TMP45]], [[TMP43]] 486; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 487; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 488; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4) 489; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 490; CHECK-NEXT: br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 491; CHECK: middle.block: 492; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 493; CHECK: scalar.ph: 494; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 495; CHECK: .lr.ph: 496; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]] 497; CHECK: ._crit_edge: 498; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ] 499; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 500; 501entry: 502 br label %.lr.ph 503 504.lr.ph: ; preds = %entry, %.lr.ph 505 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 506 %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ] 507 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 508 %l3 = load i32, ptr %l2, align 4 509 %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 510 %l5 = load i32, ptr %l4, align 4 511 %l6 = mul nsw i32 %l5, %l3 512 %l7 = trunc i64 %indvars.iv to i32 513 %l8 = add i32 %sum.02, %l7 514 %l9 = add i32 %l8, %l6 515 %indvars.iv.next = add i64 %indvars.iv, 1 516 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 517 %exitcond = icmp eq i32 %lftr.wideiv, 257 518 br i1 %exitcond, label %._crit_edge, label %.lr.ph 519 520._crit_edge: ; preds = %.lr.ph 521 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 522 ret i32 %sum.0.lcssa 523} 524 525define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { 526; CHECK-LABEL: @reduction_mul( 527; CHECK-NEXT: entry: 528; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 529; CHECK: vector.ph: 530; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 531; CHECK: vector.body: 532; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 533; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 534; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 535; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 536; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 537; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 538; CHECK: pred.load.if: 539; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 540; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 541; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 542; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 543; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 544; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 545; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 546; CHECK: pred.load.continue: 547; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 548; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 549; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 550; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 551; CHECK: pred.load.if1: 552; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 553; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 554; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 555; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 556; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 557; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 558; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 559; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 560; CHECK: pred.load.continue2: 561; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 562; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 563; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 564; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 565; CHECK: pred.load.if3: 566; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 567; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 568; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 569; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 570; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 571; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 572; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 573; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 574; CHECK: pred.load.continue4: 575; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 576; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 577; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 578; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 579; CHECK: pred.load.if5: 580; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 581; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 582; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 583; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 584; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 585; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 586; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 587; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 588; CHECK: pred.load.continue6: 589; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 590; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 591; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1) 592; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]]) 593; CHECK-NEXT: [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]] 594; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1) 595; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]]) 596; CHECK-NEXT: [[TMP45]] = mul i32 [[TMP44]], [[TMP42]] 597; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 598; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 599; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 600; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 601; CHECK: middle.block: 602; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 603; CHECK: scalar.ph: 604; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 605; CHECK: .lr.ph: 606; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]] 607; CHECK: ._crit_edge: 608; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 609; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 610; 611entry: 612 br label %.lr.ph 613 614.lr.ph: ; preds = %entry, %.lr.ph 615 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 616 %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ] 617 %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 618 %l3 = load i32, ptr %l2, align 4 619 %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 620 %l5 = load i32, ptr %l4, align 4 621 %l6 = mul i32 %sum.02, %l3 622 %l7 = mul i32 %l6, %l5 623 %indvars.iv.next = add i64 %indvars.iv, 1 624 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 625 %exitcond = icmp eq i32 %lftr.wideiv, 257 626 br i1 %exitcond, label %._crit_edge, label %.lr.ph 627 628._crit_edge: ; preds = %.lr.ph 629 %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ] 630 ret i32 %sum.0.lcssa 631} 632 633define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { 634; CHECK-LABEL: @reduction_and( 635; CHECK-NEXT: entry: 636; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 637; CHECK: vector.ph: 638; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 639; CHECK: vector.body: 640; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 641; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 642; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 643; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 644; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 645; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 646; CHECK: pred.load.if: 647; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 648; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 649; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 650; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 651; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 652; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 653; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 654; CHECK: pred.load.continue: 655; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 656; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 657; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 658; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 659; CHECK: pred.load.if1: 660; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 661; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 662; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 663; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 664; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 665; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 666; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 667; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 668; CHECK: pred.load.continue2: 669; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 670; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 671; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 672; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 673; CHECK: pred.load.if3: 674; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 675; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 676; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 677; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 678; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 679; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 680; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 681; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 682; CHECK: pred.load.continue4: 683; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 684; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 685; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 686; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 687; CHECK: pred.load.if5: 688; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 689; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 690; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 691; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 692; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 693; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 694; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 695; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 696; CHECK: pred.load.continue6: 697; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 698; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 699; CHECK-NEXT: [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1) 700; CHECK-NEXT: [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]]) 701; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]] 702; CHECK-NEXT: [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1) 703; CHECK-NEXT: [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]]) 704; CHECK-NEXT: [[TMP45]] = and i32 [[TMP44]], [[TMP42]] 705; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 706; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 707; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 708; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 709; CHECK: middle.block: 710; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 711; CHECK: scalar.ph: 712; CHECK-NEXT: br label [[FOR_BODY:%.*]] 713; CHECK: for.body: 714; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 715; CHECK: for.end: 716; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 717; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 718; 719entry: 720 br label %for.body 721 722for.body: ; preds = %entry, %for.body 723 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 724 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] 725 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 726 %l0 = load i32, ptr %arrayidx, align 4 727 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 728 %l1 = load i32, ptr %arrayidx2, align 4 729 %add = and i32 %result.08, %l0 730 %and = and i32 %add, %l1 731 %indvars.iv.next = add i64 %indvars.iv, 1 732 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 733 %exitcond = icmp eq i32 %lftr.wideiv, 257 734 br i1 %exitcond, label %for.end, label %for.body 735 736for.end: ; preds = %for.body, %entry 737 %result.0.lcssa = phi i32 [ %and, %for.body ] 738 ret i32 %result.0.lcssa 739} 740 741define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { 742; CHECK-LABEL: @reduction_or( 743; CHECK-NEXT: entry: 744; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 745; CHECK: vector.ph: 746; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 747; CHECK: vector.body: 748; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 749; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 750; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] 751; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 752; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 753; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 754; CHECK: pred.load.if: 755; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 756; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 757; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 758; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 759; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 760; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 761; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 762; CHECK: pred.load.continue: 763; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 764; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 765; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 766; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 767; CHECK: pred.load.if1: 768; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 769; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 770; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 771; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 772; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 773; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 774; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 775; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 776; CHECK: pred.load.continue2: 777; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 778; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 779; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 780; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 781; CHECK: pred.load.if3: 782; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 783; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 784; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 785; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 786; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 787; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 788; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 789; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 790; CHECK: pred.load.continue4: 791; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 792; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 793; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 794; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 795; CHECK: pred.load.if5: 796; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 797; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 798; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 799; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 800; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 801; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 802; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 803; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 804; CHECK: pred.load.continue6: 805; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 806; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 807; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] 808; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 809; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]]) 810; CHECK-NEXT: [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]] 811; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 812; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 813; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 814; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 815; CHECK: middle.block: 816; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 817; CHECK: scalar.ph: 818; CHECK-NEXT: br label [[FOR_BODY:%.*]] 819; CHECK: for.body: 820; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 821; CHECK: for.end: 822; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 823; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 824; 825entry: 826 br label %for.body 827 828for.body: ; preds = %entry, %for.body 829 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 830 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 831 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 832 %l0 = load i32, ptr %arrayidx, align 4 833 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 834 %l1 = load i32, ptr %arrayidx2, align 4 835 %add = add nsw i32 %l1, %l0 836 %or = or i32 %add, %result.08 837 %indvars.iv.next = add i64 %indvars.iv, 1 838 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 839 %exitcond = icmp eq i32 %lftr.wideiv, 257 840 br i1 %exitcond, label %for.end, label %for.body 841 842for.end: ; preds = %for.body, %entry 843 %result.0.lcssa = phi i32 [ %or, %for.body ] 844 ret i32 %result.0.lcssa 845} 846 847define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { 848; CHECK-LABEL: @reduction_xor( 849; CHECK-NEXT: entry: 850; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 851; CHECK: vector.ph: 852; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 853; CHECK: vector.body: 854; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 855; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 856; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] 857; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 858; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 859; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 860; CHECK: pred.load.if: 861; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 862; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 863; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 864; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 865; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 866; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0 867; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 868; CHECK: pred.load.continue: 869; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 870; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 871; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 872; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 873; CHECK: pred.load.if1: 874; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 875; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]] 876; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 877; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1 878; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]] 879; CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 880; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1 881; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 882; CHECK: pred.load.continue2: 883; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 884; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 885; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 886; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 887; CHECK: pred.load.if3: 888; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 889; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]] 890; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 891; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2 892; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] 893; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4 894; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2 895; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 896; CHECK: pred.load.continue4: 897; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 898; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 899; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 900; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 901; CHECK: pred.load.if5: 902; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 903; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]] 904; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 905; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3 906; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]] 907; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 908; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3 909; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 910; CHECK: pred.load.continue6: 911; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 912; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 913; CHECK-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]] 914; CHECK-NEXT: [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer 915; CHECK-NEXT: [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]]) 916; CHECK-NEXT: [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]] 917; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 918; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 919; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 920; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 921; CHECK: middle.block: 922; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 923; CHECK: scalar.ph: 924; CHECK-NEXT: br label [[FOR_BODY:%.*]] 925; CHECK: for.body: 926; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 927; CHECK: for.end: 928; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 929; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 930; 931entry: 932 br label %for.body 933 934for.body: ; preds = %entry, %for.body 935 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 936 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] 937 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 938 %l0 = load i32, ptr %arrayidx, align 4 939 %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv 940 %l1 = load i32, ptr %arrayidx2, align 4 941 %add = add nsw i32 %l1, %l0 942 %xor = xor i32 %add, %result.08 943 %indvars.iv.next = add i64 %indvars.iv, 1 944 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 945 %exitcond = icmp eq i32 %lftr.wideiv, 257 946 br i1 %exitcond, label %for.end, label %for.body 947 948for.end: ; preds = %for.body, %entry 949 %result.0.lcssa = phi i32 [ %xor, %for.body ] 950 ret i32 %result.0.lcssa 951} 952 953define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { 954; CHECK-LABEL: @reduction_fadd( 955; CHECK-NEXT: entry: 956; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 957; CHECK: vector.ph: 958; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 959; CHECK: vector.body: 960; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 961; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 962; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ] 963; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 964; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 965; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 966; CHECK: pred.load.if: 967; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 968; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 969; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 970; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 971; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 972; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 973; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 974; CHECK: pred.load.continue: 975; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 976; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 977; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 978; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 979; CHECK: pred.load.if1: 980; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 981; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] 982; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 983; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1 984; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] 985; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 986; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1 987; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 988; CHECK: pred.load.continue2: 989; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 990; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 991; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 992; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 993; CHECK: pred.load.if3: 994; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 995; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] 996; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 997; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2 998; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] 999; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4 1000; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2 1001; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1002; CHECK: pred.load.continue4: 1003; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 1004; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 1005; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1006; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1007; CHECK: pred.load.if5: 1008; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 1009; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] 1010; CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4 1011; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3 1012; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]] 1013; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4 1014; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3 1015; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1016; CHECK: pred.load.continue6: 1017; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 1018; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 1019; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer 1020; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]]) 1021; CHECK-NEXT: [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer 1022; CHECK-NEXT: [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]]) 1023; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1024; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1025; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1026; CHECK-NEXT: br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 1027; CHECK: middle.block: 1028; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1029; CHECK: scalar.ph: 1030; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1031; CHECK: for.body: 1032; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 1033; CHECK: for.end: 1034; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ] 1035; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 1036; 1037entry: 1038 br label %for.body 1039 1040for.body: ; preds = %entry, %for.body 1041 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1042 %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] 1043 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 1044 %l0 = load float, ptr %arrayidx, align 4 1045 %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 1046 %l1 = load float, ptr %arrayidx2, align 4 1047 %add = fadd fast float %result.08, %l0 1048 %fadd = fadd fast float %add, %l1 1049 %indvars.iv.next = add i64 %indvars.iv, 1 1050 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1051 %exitcond = icmp eq i32 %lftr.wideiv, 257 1052 br i1 %exitcond, label %for.end, label %for.body 1053 1054for.end: ; preds = %for.body, %entry 1055 %result.0.lcssa = phi float [ %fadd, %for.body ] 1056 ret float %result.0.lcssa 1057} 1058 1059define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { 1060; CHECK-LABEL: @reduction_fmul( 1061; CHECK-NEXT: entry: 1062; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1063; CHECK: vector.ph: 1064; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1065; CHECK: vector.body: 1066; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1067; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1068; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1069; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 1070; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 1071; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1072; CHECK: pred.load.if: 1073; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1074; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4 1075; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0 1076; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 1077; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4 1078; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 1079; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1080; CHECK: pred.load.continue: 1081; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1082; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] 1083; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 1084; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1085; CHECK: pred.load.if1: 1086; CHECK-NEXT: [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1 1087; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]] 1088; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4 1089; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1 1090; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]] 1091; CHECK-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 1092; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1 1093; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1094; CHECK: pred.load.continue2: 1095; CHECK-NEXT: [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 1096; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ] 1097; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 1098; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1099; CHECK: pred.load.if3: 1100; CHECK-NEXT: [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2 1101; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] 1102; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 1103; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2 1104; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]] 1105; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4 1106; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2 1107; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1108; CHECK: pred.load.continue4: 1109; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ] 1110; CHECK-NEXT: [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ] 1111; CHECK-NEXT: [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1112; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1113; CHECK: pred.load.if5: 1114; CHECK-NEXT: [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3 1115; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] 1116; CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4 1117; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3 1118; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]] 1119; CHECK-NEXT: [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4 1120; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3 1121; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1122; CHECK: pred.load.continue6: 1123; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] 1124; CHECK-NEXT: [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ] 1125; CHECK-NEXT: [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00) 1126; CHECK-NEXT: [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]]) 1127; CHECK-NEXT: [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]] 1128; CHECK-NEXT: [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00) 1129; CHECK-NEXT: [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]]) 1130; CHECK-NEXT: [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]] 1131; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1132; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1133; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1134; CHECK-NEXT: br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] 1135; CHECK: middle.block: 1136; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1137; CHECK: scalar.ph: 1138; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1139; CHECK: for.body: 1140; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] 1141; CHECK: for.end: 1142; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ] 1143; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 1144; 1145entry: 1146 br label %for.body 1147 1148for.body: ; preds = %entry, %for.body 1149 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1150 %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 1151 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 1152 %l0 = load float, ptr %arrayidx, align 4 1153 %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 1154 %l1 = load float, ptr %arrayidx2, align 4 1155 %add = fmul fast float %result.08, %l0 1156 %fmul = fmul fast float %add, %l1 1157 %indvars.iv.next = add i64 %indvars.iv, 1 1158 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1159 %exitcond = icmp eq i32 %lftr.wideiv, 257 1160 br i1 %exitcond, label %for.end, label %for.body 1161 1162for.end: ; preds = %for.body, %entry 1163 %result.0.lcssa = phi float [ %fmul, %for.body ] 1164 ret float %result.0.lcssa 1165} 1166 1167define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) { 1168; CHECK-LABEL: @reduction_min( 1169; CHECK-NEXT: entry: 1170; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1171; CHECK: vector.ph: 1172; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1173; CHECK: vector.body: 1174; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1175; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1176; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1177; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 1178; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 1179; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1180; CHECK: pred.load.if: 1181; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 1182; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1183; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 1184; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1185; CHECK: pred.load.continue: 1186; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1187; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 1188; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1189; CHECK: pred.load.if1: 1190; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1 1191; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] 1192; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 1193; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1 1194; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1195; CHECK: pred.load.continue2: 1196; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 1197; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 1198; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1199; CHECK: pred.load.if3: 1200; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2 1201; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 1202; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 1203; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2 1204; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1205; CHECK: pred.load.continue4: 1206; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 1207; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1208; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1209; CHECK: pred.load.if5: 1210; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3 1211; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] 1212; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 1213; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3 1214; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1215; CHECK: pred.load.continue6: 1216; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 1217; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647) 1218; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]]) 1219; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) 1220; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1221; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1222; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1223; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] 1224; CHECK: middle.block: 1225; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1226; CHECK: scalar.ph: 1227; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1228; CHECK: for.body: 1229; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] 1230; CHECK: for.end: 1231; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 1232; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 1233; 1234entry: 1235 br label %for.body 1236 1237for.body: ; preds = %entry, %for.body 1238 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1239 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 1240 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 1241 %l0 = load i32, ptr %arrayidx, align 4 1242 %c0 = icmp slt i32 %result.08, %l0 1243 %v0 = select i1 %c0, i32 %result.08, i32 %l0 1244 %indvars.iv.next = add i64 %indvars.iv, 1 1245 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1246 %exitcond = icmp eq i32 %lftr.wideiv, 257 1247 br i1 %exitcond, label %for.end, label %for.body 1248 1249for.end: ; preds = %for.body, %entry 1250 %result.0.lcssa = phi i32 [ %v0, %for.body ] 1251 ret i32 %result.0.lcssa 1252} 1253 1254define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) { 1255; CHECK-LABEL: @reduction_max( 1256; CHECK-NEXT: entry: 1257; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1258; CHECK: vector.ph: 1259; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1260; CHECK: vector.body: 1261; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1262; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1263; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1264; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257) 1265; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 1266; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1267; CHECK: pred.load.if: 1268; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 1269; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 1270; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 1271; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1272; CHECK: pred.load.continue: 1273; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ] 1274; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 1275; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1276; CHECK: pred.load.if1: 1277; CHECK-NEXT: [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1 1278; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]] 1279; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 1280; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1 1281; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1282; CHECK: pred.load.continue2: 1283; CHECK-NEXT: [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ] 1284; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 1285; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1286; CHECK: pred.load.if3: 1287; CHECK-NEXT: [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2 1288; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]] 1289; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 1290; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2 1291; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1292; CHECK: pred.load.continue4: 1293; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ] 1294; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1295; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1296; CHECK: pred.load.if5: 1297; CHECK-NEXT: [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3 1298; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]] 1299; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 1300; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3 1301; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1302; CHECK: pred.load.continue6: 1303; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ] 1304; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer 1305; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]]) 1306; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]]) 1307; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1308; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) 1309; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260 1310; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] 1311; CHECK: middle.block: 1312; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1313; CHECK: scalar.ph: 1314; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1315; CHECK: for.body: 1316; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] 1317; CHECK: for.end: 1318; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 1319; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 1320; 1321entry: 1322 br label %for.body 1323 1324for.body: ; preds = %entry, %for.body 1325 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1326 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 1327 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 1328 %l0 = load i32, ptr %arrayidx, align 4 1329 %c0 = icmp ugt i32 %result.08, %l0 1330 %v0 = select i1 %c0, i32 %result.08, i32 %l0 1331 %indvars.iv.next = add i64 %indvars.iv, 1 1332 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1333 %exitcond = icmp eq i32 %lftr.wideiv, 257 1334 br i1 %exitcond, label %for.end, label %for.body 1335 1336for.end: ; preds = %for.body, %entry 1337 %result.0.lcssa = phi i32 [ %v0, %for.body ] 1338 ret i32 %result.0.lcssa 1339} 1340 1341; Conditional reductions with multi-input phis. 1342define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { 1343; CHECK-LABEL: @reduction_conditional( 1344; CHECK-NEXT: entry: 1345; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1346; CHECK: vector.ph: 1347; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i64 0 1348; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1349; CHECK: vector.body: 1350; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1351; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] 1352; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] 1353; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 1354; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] 1355; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 1356; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 1357; CHECK-NEXT: [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) 1358; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]] 1359; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00) 1360; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]] 1361; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) 1362; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]] 1363; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) 1364; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] 1365; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] 1366; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] 1367; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] 1368; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1369; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 1370; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] 1371; CHECK: middle.block: 1372; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) 1373; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1374; CHECK: scalar.ph: 1375; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1376; CHECK: for.body: 1377; CHECK-NEXT: br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] 1378; CHECK: if.then: 1379; CHECK-NEXT: br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] 1380; CHECK: if.then8: 1381; CHECK-NEXT: br label [[FOR_INC]] 1382; CHECK: if.else: 1383; CHECK-NEXT: br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]] 1384; CHECK: if.then16: 1385; CHECK-NEXT: br label [[FOR_INC]] 1386; CHECK: for.inc: 1387; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]] 1388; CHECK: for.end: 1389; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 1390; CHECK-NEXT: ret float [[SUM_1_LCSSA]] 1391; 1392entry: 1393 br label %for.body 1394 1395for.body: 1396 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1397 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] 1398 %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv 1399 %l0 = load float, ptr %arrayidx, align 4 1400 %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv 1401 %l1 = load float, ptr %arrayidx2, align 4 1402 %cmp3 = fcmp ogt float %l0, %l1 1403 br i1 %cmp3, label %if.then, label %for.inc 1404 1405if.then: 1406 %cmp6 = fcmp ogt float %l1, 1.000000e+00 1407 br i1 %cmp6, label %if.then8, label %if.else 1408 1409if.then8: 1410 %add = fadd fast float %sum.033, %l0 1411 br label %for.inc 1412 1413if.else: 1414 %cmp14 = fcmp ogt float %l0, 2.000000e+00 1415 br i1 %cmp14, label %if.then16, label %for.inc 1416 1417if.then16: 1418 %add19 = fadd fast float %sum.033, %l1 1419 br label %for.inc 1420 1421for.inc: 1422 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ] 1423 %indvars.iv.next = add i64 %indvars.iv, 1 1424 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1425 %exitcond = icmp ne i32 %lftr.wideiv, 128 1426 br i1 %exitcond, label %for.body, label %for.end 1427 1428for.end: 1429 %sum.1.lcssa = phi float [ %sum.1, %for.inc ] 1430 ret float %sum.1.lcssa 1431} 1432 1433define i8 @reduction_add_trunc(ptr noalias nocapture %A) { 1434; CHECK-LABEL: @reduction_add_trunc( 1435; CHECK-NEXT: entry: 1436; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1437; CHECK: vector.ph: 1438; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1439; CHECK: vector.body: 1440; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1441; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1442; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1443; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) 1444; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) 1445; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 1446; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1447; CHECK: pred.load.if: 1448; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INDEX]] to i64 1449; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]] 1450; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 4 1451; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i64 0 1452; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1453; CHECK: pred.load.continue: 1454; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 1455; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 1456; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1457; CHECK: pred.load.if1: 1458; CHECK-NEXT: [[TMP9:%.*]] = or disjoint i32 [[INDEX]], 1 1459; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 1460; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]] 1461; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 1462; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i64 1 1463; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1464; CHECK: pred.load.continue2: 1465; CHECK-NEXT: [[TMP14:%.*]] = phi <4 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] 1466; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 1467; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1468; CHECK: pred.load.if3: 1469; CHECK-NEXT: [[TMP16:%.*]] = or disjoint i32 [[INDEX]], 2 1470; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 1471; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP17]] 1472; CHECK-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 4 1473; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i64 2 1474; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1475; CHECK: pred.load.continue4: 1476; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x i8> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ] 1477; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1478; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1479; CHECK: pred.load.if5: 1480; CHECK-NEXT: [[TMP23:%.*]] = or disjoint i32 [[INDEX]], 3 1481; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 1482; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP24]] 1483; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 4 1484; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i64 3 1485; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1486; CHECK: pred.load.continue6: 1487; CHECK-NEXT: [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ] 1488; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> 1489; CHECK-NEXT: [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]] 1490; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 1491; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 1492; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 1493; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] 1494; CHECK: middle.block: 1495; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]] 1496; CHECK-NEXT: [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8> 1497; CHECK-NEXT: [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]]) 1498; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1499; CHECK: scalar.ph: 1500; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1501; CHECK: .lr.ph: 1502; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]] 1503; CHECK: ._crit_edge: 1504; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ] 1505; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] 1506; 1507entry: 1508 br label %.lr.ph 1509 1510.lr.ph: ; preds = %entry, %.lr.ph 1511 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 1512 %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ] 1513 %sum.02 = and i32 %sum.02p, 255 1514 %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv 1515 %l3 = load i8, ptr %l2, align 4 1516 %l3e = zext i8 %l3 to i32 1517 %l9 = add i32 %sum.02, %l3e 1518 %indvars.iv.next = add i32 %indvars.iv, 1 1519 %exitcond = icmp eq i32 %indvars.iv.next, 257 1520 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1521 1522._crit_edge: ; preds = %.lr.ph 1523 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 1524 %ret = trunc i32 %sum.0.lcssa to i8 1525 ret i8 %ret 1526} 1527 1528 1529define i8 @reduction_and_trunc(ptr noalias nocapture %A) { 1530; CHECK-LABEL: @reduction_and_trunc( 1531; CHECK-NEXT: entry: 1532; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1533; CHECK: vector.ph: 1534; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1535; CHECK: vector.body: 1536; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 1537; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1538; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ] 1539; CHECK-NEXT: [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257) 1540; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0 1541; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1542; CHECK: pred.load.if: 1543; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INDEX]] to i64 1544; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]] 1545; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 1546; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i64 0 1547; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1548; CHECK: pred.load.continue: 1549; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ] 1550; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1 1551; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1552; CHECK: pred.load.if1: 1553; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i32 [[INDEX]], 1 1554; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 1555; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] 1556; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4 1557; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[TMP11]], i64 1 1558; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1559; CHECK: pred.load.continue2: 1560; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i8> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 1561; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2 1562; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1563; CHECK: pred.load.if3: 1564; CHECK-NEXT: [[TMP15:%.*]] = or disjoint i32 [[INDEX]], 2 1565; CHECK-NEXT: [[TMP16:%.*]] = sext i32 [[TMP15]] to i64 1566; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]] 1567; CHECK-NEXT: [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 4 1568; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP18]], i64 2 1569; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1570; CHECK: pred.load.continue4: 1571; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] 1572; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3 1573; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 1574; CHECK: pred.load.if5: 1575; CHECK-NEXT: [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 3 1576; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 1577; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP23]] 1578; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 4 1579; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP25]], i64 3 1580; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1581; CHECK: pred.load.continue6: 1582; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] 1583; CHECK-NEXT: [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32> 1584; CHECK-NEXT: [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]] 1585; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 1586; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 1587; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260 1588; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] 1589; CHECK: middle.block: 1590; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]] 1591; CHECK-NEXT: [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8> 1592; CHECK-NEXT: [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]]) 1593; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1594; CHECK: scalar.ph: 1595; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1596; CHECK: .lr.ph: 1597; CHECK-NEXT: br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]] 1598; CHECK: ._crit_edge: 1599; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] 1600; CHECK-NEXT: ret i8 [[SUM_0_LCSSA]] 1601; 1602entry: 1603 br label %.lr.ph 1604 1605.lr.ph: ; preds = %entry, %.lr.ph 1606 %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ] 1607 %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ] 1608 %sum.02 = and i32 %sum.02p, 255 1609 %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv 1610 %l3 = load i8, ptr %l2, align 4 1611 %l3e = zext i8 %l3 to i32 1612 %l9 = and i32 %sum.02, %l3e 1613 %indvars.iv.next = add i32 %indvars.iv, 1 1614 %exitcond = icmp eq i32 %indvars.iv.next, 257 1615 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1616 1617._crit_edge: ; preds = %.lr.ph 1618 %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ] 1619 %ret = trunc i32 %sum.0.lcssa to i8 1620 ret i8 %ret 1621} 1622