1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s 3 4define i32 @reduction_smin(ptr nocapture %A, ptr nocapture %B) { 5; CHECK-LABEL: define i32 @reduction_smin 6; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 7; CHECK-NEXT: entry: 8; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 9; CHECK: vector.ph: 10; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 11; CHECK: vector.body: 12; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 13; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 14; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 15; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 16; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[WIDE_LOAD]]) 17; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) 18; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 19; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 20; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 21; CHECK: middle.block: 22; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 23; CHECK: scalar.ph: 24; CHECK-NEXT: br label [[FOR_BODY:%.*]] 25; CHECK: for.body: 26; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 27; CHECK: for.end: 28; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 29; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 30; 31entry: 32 br label %for.body 33 34for.body: ; preds = %entry, %for.body 35 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 36 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 37 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 38 %l0 = load i32, ptr %arrayidx, align 4 39 %c0 = icmp slt i32 %result.08, %l0 40 %v0 = select i1 %c0, i32 %result.08, i32 %l0 41 %indvars.iv.next = add i64 %indvars.iv, 1 42 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 43 %exitcond = icmp eq i32 %lftr.wideiv, 256 44 br i1 %exitcond, label %for.end, label %for.body 45 46for.end: ; preds = %for.body, %entry 47 %result.0.lcssa = phi i32 [ %v0, %for.body ] 48 ret i32 %result.0.lcssa 49} 50 51define i32 @reduction_smin_select_ops_flipped(ptr nocapture %A, ptr nocapture %B) { 52; CHECK-LABEL: define i32 @reduction_smin_select_ops_flipped 53; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 54; CHECK-NEXT: entry: 55; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 56; CHECK: vector.ph: 57; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 58; CHECK: vector.body: 59; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 60; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 61; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 62; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 63; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[WIDE_LOAD]]) 64; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) 65; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 66; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 67; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 68; CHECK: middle.block: 69; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 70; CHECK: scalar.ph: 71; CHECK-NEXT: br label [[FOR_BODY:%.*]] 72; CHECK: for.body: 73; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 74; CHECK: for.end: 75; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 76; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 77; 78entry: 79 br label %for.body 80 81for.body: ; preds = %entry, %for.body 82 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 83 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 84 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 85 %l0 = load i32, ptr %arrayidx, align 4 86 %c0 = icmp slt i32 %result.08, %l0 87 %v0 = select i1 %c0, i32 %l0, i32 %result.08 88 %indvars.iv.next = add i64 %indvars.iv, 1 89 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 90 %exitcond = icmp eq i32 %lftr.wideiv, 256 91 br i1 %exitcond, label %for.end, label %for.body 92 93for.end: ; preds = %for.body, %entry 94 %result.0.lcssa = phi i32 [ %v0, %for.body ] 95 ret i32 %result.0.lcssa 96} 97 98define i32 @reduction_smin_intrinsic(ptr nocapture %A, ptr nocapture %B) { 99; CHECK-LABEL: define i32 @reduction_smin_intrinsic 100; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 101; CHECK-NEXT: entry: 102; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 103; CHECK: vector.ph: 104; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 105; CHECK: vector.body: 106; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 107; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] 108; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 109; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 110; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) 111; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 112; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 113; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 114; CHECK: middle.block: 115; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP1]]) 116; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 117; CHECK: scalar.ph: 118; CHECK-NEXT: br label [[FOR_BODY:%.*]] 119; CHECK: for.body: 120; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 121; CHECK: for.end: 122; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 123; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 124; 125entry: 126 br label %for.body 127 128for.body: ; preds = %entry, %for.body 129 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 130 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 131 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 132 %l0 = load i32, ptr %arrayidx, align 4 133 %v0 = call i32 @llvm.smin.i32(i32 %result.08, i32 %l0) 134 %indvars.iv.next = add i64 %indvars.iv, 1 135 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 136 %exitcond = icmp eq i32 %lftr.wideiv, 256 137 br i1 %exitcond, label %for.end, label %for.body 138 139for.end: ; preds = %for.body, %entry 140 %result.0.lcssa = phi i32 [ %v0, %for.body ] 141 ret i32 %result.0.lcssa 142} 143 144declare i32 @llvm.smin.i32(i32, i32) 145 146define i32 @reduction_umax(ptr nocapture %A, ptr nocapture %B) { 147; CHECK-LABEL: define i32 @reduction_umax 148; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 149; CHECK-NEXT: entry: 150; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 151; CHECK: vector.ph: 152; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 153; CHECK: vector.body: 154; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 155; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 156; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 157; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 158; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[WIDE_LOAD]]) 159; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) 160; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 161; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 162; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 163; CHECK: middle.block: 164; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 165; CHECK: scalar.ph: 166; CHECK-NEXT: br label [[FOR_BODY:%.*]] 167; CHECK: for.body: 168; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 169; CHECK: for.end: 170; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 171; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 172; 173entry: 174 br label %for.body 175 176for.body: ; preds = %entry, %for.body 177 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 178 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 179 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 180 %l0 = load i32, ptr %arrayidx, align 4 181 %c0 = icmp ugt i32 %result.08, %l0 182 %v0 = select i1 %c0, i32 %result.08, i32 %l0 183 %indvars.iv.next = add i64 %indvars.iv, 1 184 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 185 %exitcond = icmp eq i32 %lftr.wideiv, 256 186 br i1 %exitcond, label %for.end, label %for.body 187 188for.end: ; preds = %for.body, %entry 189 %result.0.lcssa = phi i32 [ %v0, %for.body ] 190 ret i32 %result.0.lcssa 191} 192 193define i32 @reduction_umax_select_ops_flipped(ptr nocapture %A, ptr nocapture %B) { 194; CHECK-LABEL: define i32 @reduction_umax_select_ops_flipped 195; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 196; CHECK-NEXT: entry: 197; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 198; CHECK: vector.ph: 199; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 200; CHECK: vector.body: 201; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 202; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ] 203; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 204; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 205; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[WIDE_LOAD]]) 206; CHECK-NEXT: [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[VEC_PHI]]) 207; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 208; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 209; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 210; CHECK: middle.block: 211; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 212; CHECK: scalar.ph: 213; CHECK-NEXT: br label [[FOR_BODY:%.*]] 214; CHECK: for.body: 215; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 216; CHECK: for.end: 217; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ] 218; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 219; 220entry: 221 br label %for.body 222 223for.body: ; preds = %entry, %for.body 224 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 225 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 226 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 227 %l0 = load i32, ptr %arrayidx, align 4 228 %c0 = icmp ugt i32 %result.08, %l0 229 %v0 = select i1 %c0, i32 %l0, i32 %result.08 230 %indvars.iv.next = add i64 %indvars.iv, 1 231 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 232 %exitcond = icmp eq i32 %lftr.wideiv, 256 233 br i1 %exitcond, label %for.end, label %for.body 234 235for.end: ; preds = %for.body, %entry 236 %result.0.lcssa = phi i32 [ %v0, %for.body ] 237 ret i32 %result.0.lcssa 238} 239 240define i32 @reduction_umax_intrinsic(ptr nocapture %A, ptr nocapture %B) { 241; CHECK-LABEL: define i32 @reduction_umax_intrinsic 242; CHECK-SAME: (ptr captures(none) [[A:%.*]], ptr captures(none) [[B:%.*]]) { 243; CHECK-NEXT: entry: 244; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 245; CHECK: vector.ph: 246; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 247; CHECK: vector.body: 248; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 249; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ] 250; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] 251; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 252; CHECK-NEXT: [[TMP1]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) 253; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 254; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 255; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 256; CHECK: middle.block: 257; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP1]]) 258; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 259; CHECK: scalar.ph: 260; CHECK-NEXT: br label [[FOR_BODY:%.*]] 261; CHECK: for.body: 262; CHECK-NEXT: br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 263; CHECK: for.end: 264; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 265; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 266; 267entry: 268 br label %for.body 269 270for.body: ; preds = %entry, %for.body 271 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 272 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 273 %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv 274 %l0 = load i32, ptr %arrayidx, align 4 275 %v0 = call i32 @llvm.umax.i32(i32 %result.08, i32 %l0) 276 %indvars.iv.next = add i64 %indvars.iv, 1 277 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 278 %exitcond = icmp eq i32 %lftr.wideiv, 256 279 br i1 %exitcond, label %for.end, label %for.body 280 281for.end: ; preds = %for.body, %entry 282 %result.0.lcssa = phi i32 [ %v0, %for.body ] 283 ret i32 %result.0.lcssa 284} 285 286declare i32 @llvm.umax.i32(i32, i32) 287