190f5c8b7SMel Chen; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1 3*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 4*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 5ab9cd27fSMel Chen 6*b3cba9beSMel Chen; About the truncated test cases, the range analysis of induction variable is 7*b3cba9beSMel Chen; used to ensure the induction variable is always greater than the sentinal 8*b3cba9beSMel Chen; value. The case is vectorizable if the truncated induction variable is 9*b3cba9beSMel Chen; monotonic increasing, and not equals to the sentinal. 10ab9cd27fSMel Chendefine i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { 11*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit( 12*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 13*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 14*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 15*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 16*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]: 17*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 18*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 19*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 20*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_PH]]: 21*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 22*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 23*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] 24*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_BODY]]: 25*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 26*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] 27*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 28*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 29*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 30*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 31*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 32*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) 33*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 34*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 35*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 36*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 37*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 38*b3cba9beSMel Chen; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: 39*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) 40*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648 41*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331 42*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 43*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 44*b3cba9beSMel Chen; CHECK-VF4IC1: [[SCALAR_PH]]: 45*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 46*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 47*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 48*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 49*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 50*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 51*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 52*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 53*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3 54*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 55*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]] 56*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 57*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 58*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 59*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]: 60*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 61*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[EXIT]] 62*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 63*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 64*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]] 65*b3cba9beSMel Chen; 66*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit( 67*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 68*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 69*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 70*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 71*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]: 72*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 73*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16 74*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 75*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_PH]]: 76*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16 77*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 78*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] 79*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_BODY]]: 80*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 81*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] 82*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] 83*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] 84*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] 85*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 86*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 87*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) 88*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) 89*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 90*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 91*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 92*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 93*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 94*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 95*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 96*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 97*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 98*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 99*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) 100*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3) 101*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3) 102*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3) 103*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 104*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] 105*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] 106*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] 107*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 108*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) 109*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 110*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 111*b3cba9beSMel Chen; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: 112*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) 113*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]]) 114*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]]) 115*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]]) 116*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648 117*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331 118*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 119*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 120*b3cba9beSMel Chen; CHECK-VF4IC4: [[SCALAR_PH]]: 121*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 122*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 123*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 124*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 125*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 126*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 127*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 128*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 129*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3 130*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32 131*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]] 132*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 133*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 134*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 135*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]: 136*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 137*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[EXIT]] 138*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 139*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 140*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]] 141*b3cba9beSMel Chen; 142*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit( 143*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 144*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 145*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0 146*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 147*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]: 148*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 149*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4 150*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 151*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_PH]]: 152*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4 153*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]] 154*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] 155*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_BODY]]: 156*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 157*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] 158*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] 159*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] 160*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] 161*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 162*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 163*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 164*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 165*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 166*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0 167*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1 168*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2 169*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3 170*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 171*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 172*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] 173*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] 174*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 175*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 176*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 177*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 178*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3 179*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3 180*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3 181*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3 182*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]] 183*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]] 184*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]] 185*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]] 186*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 187*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 188*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 189*b3cba9beSMel Chen; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: 190*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]]) 191*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]]) 192*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]]) 193*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648 194*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331 195*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] 196*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] 197*b3cba9beSMel Chen; CHECK-VF1IC4: [[SCALAR_PH]]: 198*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 199*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 200*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 201*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 202*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 203*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 204*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 205*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 206*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3 207*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32 208*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]] 209*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 210*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 211*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 212*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]: 213*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 214*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[EXIT]] 215*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 216*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 217*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]] 218ab9cd27fSMel Chen; 219ab9cd27fSMel Chenentry: 220ab9cd27fSMel Chen %cmp.sgt = icmp sgt i32 %n, 0 221ab9cd27fSMel Chen br i1 %cmp.sgt, label %for.body.preheader, label %exit 222ab9cd27fSMel Chen 223ab9cd27fSMel Chenfor.body.preheader: ; preds = %entry 224ab9cd27fSMel Chen %wide.trip.count = zext i32 %n to i64 225ab9cd27fSMel Chen br label %for.body 226ab9cd27fSMel Chen 227ab9cd27fSMel Chenfor.body: ; preds = %for.body.preheader, %for.body 228ab9cd27fSMel Chen %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ] 229ab9cd27fSMel Chen %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ] 230ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 231ab9cd27fSMel Chen %0 = load i64, ptr %arrayidx, align 8 232ab9cd27fSMel Chen %cmp = icmp sgt i64 %0, 3 233ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 234ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %1, i32 %rdx 235ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 236ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, %wide.trip.count 237ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 238ab9cd27fSMel Chen 239ab9cd27fSMel Chenexit: ; preds = %for.body, %entry 240ab9cd27fSMel Chen %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 241ab9cd27fSMel Chen ret i32 %rdx.lcssa 242ab9cd27fSMel Chen} 243ab9cd27fSMel Chen 244*b3cba9beSMel Chen; Without loop guard, the range analysis is also able to base on the constant 245*b3cba9beSMel Chen; trip count. 246ab9cd27fSMel Chendefine i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { 247*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( 248*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { 249*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 250*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 251*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_PH]]: 252*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] 253*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_BODY]]: 254*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 255*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] 256*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 257*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 258*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 259*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 260*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 261*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) 262*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 263*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 264*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 265*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 266*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 267*b3cba9beSMel Chen; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: 268*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) 269*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648 270*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331 271*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 272*b3cba9beSMel Chen; CHECK-VF4IC1: [[SCALAR_PH]]: 273*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 274*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] 275*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 276*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 277*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 278*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 279*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 280*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 281*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3 282*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 283*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]] 284*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 285*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 286*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 287*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 288*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 289*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 290*b3cba9beSMel Chen; 291*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( 292*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { 293*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 294*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 295*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_PH]]: 296*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] 297*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_BODY]]: 298*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 299*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] 300*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] 301*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] 302*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] 303*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 304*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 305*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) 306*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) 307*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 308*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 309*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 310*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 311*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 312*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 313*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 314*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 315*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 316*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 317*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3) 318*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3) 319*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3) 320*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3) 321*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 322*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] 323*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] 324*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] 325*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 326*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) 327*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 328*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 329*b3cba9beSMel Chen; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: 330*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) 331*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]]) 332*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]]) 333*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]]) 334*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648 335*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331 336*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 337*b3cba9beSMel Chen; CHECK-VF4IC4: [[SCALAR_PH]]: 338*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 339*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] 340*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 341*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 342*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 343*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 344*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 345*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 346*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3 347*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32 348*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]] 349*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 350*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 351*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 352*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 353*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 354*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 355*b3cba9beSMel Chen; 356*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( 357*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { 358*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 359*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 360*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_PH]]: 361*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] 362*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_BODY]]: 363*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 364*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] 365*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] 366*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] 367*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] 368*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 369*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 370*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 371*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 372*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 373*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0 374*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1 375*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2 376*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3 377*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] 378*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 379*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] 380*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] 381*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8 382*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8 383*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8 384*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8 385*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3 386*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3 387*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3 388*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3 389*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]] 390*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]] 391*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]] 392*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]] 393*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 394*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000 395*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 396*b3cba9beSMel Chen; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: 397*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]]) 398*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]]) 399*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]]) 400*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648 401*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331 402*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 403*b3cba9beSMel Chen; CHECK-VF1IC4: [[SCALAR_PH]]: 404*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 405*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ] 406*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 407*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 408*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 409*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 410*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 411*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 412*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3 413*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32 414*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]] 415*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 416*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000 417*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 418*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 419*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 420*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 421ab9cd27fSMel Chen; 422ab9cd27fSMel Chenentry: 423ab9cd27fSMel Chen br label %for.body 424ab9cd27fSMel Chen 425ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 426ab9cd27fSMel Chen %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ] 427ab9cd27fSMel Chen %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 428ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 429ab9cd27fSMel Chen %0 = load i64, ptr %arrayidx, align 8 430ab9cd27fSMel Chen %cmp = icmp sgt i64 %0, 3 431ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 432ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %1, i32 %rdx 433ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 434ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, 20000 435ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 436ab9cd27fSMel Chen 437ab9cd27fSMel Chenexit: ; preds = %for.body 438ab9cd27fSMel Chen ret i32 %spec.select 439ab9cd27fSMel Chen} 440ab9cd27fSMel Chen 441ab9cd27fSMel Chen; Without loop guard, the maximum constant trip count that can be vectorized is 442ab9cd27fSMel Chen; the signed maximum value of reduction type. 443ab9cd27fSMel Chendefine i32 @select_fcmp_max_valid_const_ub(ptr %a) { 444*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_fcmp_max_valid_const_ub( 445*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { 446*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 447*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 448*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_PH]]: 449*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] 450*b3cba9beSMel Chen; CHECK-VF4IC1: [[VECTOR_BODY]]: 451*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 452*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] 453*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 454*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 455*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 456*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 457*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 458*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer 459*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 460*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 461*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 462*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 463*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 464*b3cba9beSMel Chen; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: 465*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]]) 466*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648 467*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1 468*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 469*b3cba9beSMel Chen; CHECK-VF4IC1: [[SCALAR_PH]]: 470*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 471*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ] 472*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 473*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 474*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 475*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 476*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 477*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 478*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP7]], 0.000000e+00 479*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = trunc i64 [[IV]] to i32 480*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]] 481*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 482*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 483*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 484*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 485*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 486*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 487*b3cba9beSMel Chen; 488*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( 489*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { 490*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 491*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 492*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_PH]]: 493*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] 494*b3cba9beSMel Chen; CHECK-VF4IC4: [[VECTOR_BODY]]: 495*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 496*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] 497*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] 498*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] 499*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] 500*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] 501*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) 502*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) 503*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) 504*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 505*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 506*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 507*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 508*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 509*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 510*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 511*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 512*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 513*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 514*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer 515*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD4]], zeroinitializer 516*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD5]], zeroinitializer 517*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD6]], zeroinitializer 518*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] 519*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] 520*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] 521*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] 522*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 523*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) 524*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 525*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 526*b3cba9beSMel Chen; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: 527*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]]) 528*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]]) 529*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]]) 530*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]]) 531*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648 532*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1 533*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 534*b3cba9beSMel Chen; CHECK-VF4IC4: [[SCALAR_PH]]: 535*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 536*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ] 537*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 538*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 539*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 540*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 541*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 542*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4 543*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00 544*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = trunc i64 [[IV]] to i32 545*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]] 546*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 547*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 548*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 549*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 550*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 551*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 552*b3cba9beSMel Chen; 553*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub( 554*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { 555*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 556*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] 557*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_PH]]: 558*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] 559*b3cba9beSMel Chen; CHECK-VF1IC4: [[VECTOR_BODY]]: 560*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] 561*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] 562*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] 563*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] 564*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] 565*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 566*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 567*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 568*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 569*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32 570*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 0 571*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], 1 572*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = add i32 [[TMP4]], 2 573*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP4]], 3 574*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]] 575*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]] 576*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]] 577*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]] 578*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP9]], align 4 579*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP10]], align 4 580*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP11]], align 4 581*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP12]], align 4 582*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP17:%.*]] = fcmp fast olt float [[TMP13]], 0.000000e+00 583*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP18:%.*]] = fcmp fast olt float [[TMP14]], 0.000000e+00 584*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP19:%.*]] = fcmp fast olt float [[TMP15]], 0.000000e+00 585*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP20:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00 586*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]] 587*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]] 588*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]] 589*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]] 590*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 591*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 592*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 593*b3cba9beSMel Chen; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: 594*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]]) 595*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]]) 596*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]]) 597*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648 598*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1 599*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] 600*b3cba9beSMel Chen; CHECK-VF1IC4: [[SCALAR_PH]]: 601*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] 602*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ] 603*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 604*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 605*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 606*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 607*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 608*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX]], align 4 609*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP26]], 0.000000e+00 610*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = trunc i64 [[IV]] to i32 611*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]] 612*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 613*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648 614*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 615*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 616*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] 617*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 618ab9cd27fSMel Chen; 619ab9cd27fSMel Chenentry: 620ab9cd27fSMel Chen br label %for.body 621ab9cd27fSMel Chen 622ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 623ab9cd27fSMel Chen %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ] 624ab9cd27fSMel Chen %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ] 625ab9cd27fSMel Chen %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 626ab9cd27fSMel Chen %0 = load float, ptr %arrayidx, align 4 627ab9cd27fSMel Chen %cmp = fcmp fast olt float %0, 0.000000e+00 628ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 629ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %1, i32 %rdx 630ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 631ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, 2147483648 632ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 633ab9cd27fSMel Chen 634ab9cd27fSMel Chenexit: ; preds = %for.body 635ab9cd27fSMel Chen ret i32 %spec.select 636ab9cd27fSMel Chen} 637ab9cd27fSMel Chen 638ab9cd27fSMel Chen; Negative tests 639ab9cd27fSMel Chen 640ab9cd27fSMel Chen; This test can theoretically be vectorized, but only with a runtime-check. 641ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is: 642ab9cd27fSMel Chen; %1 = trunc i64 %iv to i32 643ab9cd27fSMel Chen; However, the loop guard is an i64: 644ab9cd27fSMel Chen; %cmp.sgt = icmp sgt i64 %n, 0 645ab9cd27fSMel Chen; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the 646ab9cd27fSMel Chen; sentinel value), and need a runtime-check to vectorize this case. 647ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) { 648*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit( 649*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { 650*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 651*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0 652*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 653*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]: 654*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 655*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 656*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 657*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 658*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 659*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 660*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 661*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 662*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 663*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 664*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 665*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 666*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]: 667*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 668*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[EXIT]] 669*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 670*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 671*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]] 672*b3cba9beSMel Chen; 673*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit( 674*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { 675*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 676*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0 677*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 678*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]: 679*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 680*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 681*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 682*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 683*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 684*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 685*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 686*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 687*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 688*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 689*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 690*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 691*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]: 692*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 693*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[EXIT]] 694*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 695*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 696*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]] 697*b3cba9beSMel Chen; 698*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit( 699*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) { 700*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 701*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0 702*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 703*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]: 704*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 705*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 706*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] 707*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] 708*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 709*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 710*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 711*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 712*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 713*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 714*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] 715*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 716*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]: 717*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 718*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[EXIT]] 719*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 720*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 721*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]] 722ab9cd27fSMel Chen; 723ab9cd27fSMel Chenentry: 724ab9cd27fSMel Chen %cmp.sgt = icmp sgt i64 %n, 0 725ab9cd27fSMel Chen br i1 %cmp.sgt, label %for.body, label %exit 726ab9cd27fSMel Chen 727ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 728ab9cd27fSMel Chen %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ] 729ab9cd27fSMel Chen %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 730ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 731ab9cd27fSMel Chen %0 = load i32, ptr %arrayidx, align 4 732ab9cd27fSMel Chen %cmp = icmp sgt i32 %0, 3 733ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 734ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %1, i32 %rdx 735ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 736ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, %n 737ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 738ab9cd27fSMel Chen 739ab9cd27fSMel Chenexit: ; preds = %for.body, %entry 740ab9cd27fSMel Chen %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 741ab9cd27fSMel Chen ret i32 %rdx.lcssa 742ab9cd27fSMel Chen} 743ab9cd27fSMel Chen 744ab9cd27fSMel Chen; This test can theoretically be vectorized, but only with a runtime-check. 745ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is: 746ab9cd27fSMel Chen; %1 = trunc i64 %iv to i32 747ab9cd27fSMel Chen; However, the loop guard is unsigned: 748ab9cd27fSMel Chen; %cmp.not = icmp eq i32 %n, 0 749ab9cd27fSMel Chen; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the 750ab9cd27fSMel Chen; sentinel value), and need a runtime-check to vectorize this case. 751ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { 752*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard( 753*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 754*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 755*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0 756*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]] 757*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]: 758*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 759*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 760*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 761*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 762*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 763*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 764*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 765*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3 766*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 767*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]] 768*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 769*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 770*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 771*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]: 772*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 773*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[EXIT]] 774*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 775*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 776*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[RDX_LCSSA]] 777*b3cba9beSMel Chen; 778*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard( 779*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 780*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 781*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0 782*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]] 783*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]: 784*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 785*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 786*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 787*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 788*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 789*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 790*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 791*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3 792*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 793*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]] 794*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 795*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 796*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 797*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]: 798*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 799*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[EXIT]] 800*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 801*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 802*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[RDX_LCSSA]] 803*b3cba9beSMel Chen; 804*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard( 805*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { 806*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 807*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0 808*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]] 809*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]: 810*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 811*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 812*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 813*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 814*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 815*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 816*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 817*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3 818*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 819*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]] 820*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 821*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 822*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 823*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]: 824*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 825*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[EXIT]] 826*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 827*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] 828*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[RDX_LCSSA]] 829ab9cd27fSMel Chen; 830ab9cd27fSMel Chenentry: 831ab9cd27fSMel Chen %cmp.not = icmp eq i32 %n, 0 832ab9cd27fSMel Chen br i1 %cmp.not, label %exit, label %for.body.preheader 833ab9cd27fSMel Chen 834ab9cd27fSMel Chenfor.body.preheader: ; preds = %entry 835ab9cd27fSMel Chen %wide.trip.count = zext i32 %n to i64 836ab9cd27fSMel Chen br label %for.body 837ab9cd27fSMel Chen 838ab9cd27fSMel Chenfor.body: ; preds = %for.body.preheader, %for.body 839ab9cd27fSMel Chen %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ] 840ab9cd27fSMel Chen %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ] 841ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 842ab9cd27fSMel Chen %0 = load i32, ptr %arrayidx, align 4 843ab9cd27fSMel Chen %cmp1 = icmp sgt i32 %0, 3 844ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 845ab9cd27fSMel Chen %spec.select = select i1 %cmp1, i32 %1, i32 %rdx 846ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 847ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, %wide.trip.count 848ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 849ab9cd27fSMel Chen 850ab9cd27fSMel Chenexit: ; preds = %for.body, %entry 851ab9cd27fSMel Chen %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 852ab9cd27fSMel Chen ret i32 %rdx.lcssa 853ab9cd27fSMel Chen} 854ab9cd27fSMel Chen 855ab9cd27fSMel Chen; This test cannot be vectorized, even with a runtime check. 856ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is: 857ab9cd27fSMel Chen; %1 = trunc i64 %iv to i32 858ab9cd27fSMel Chen; However, the loop exit condition is a constant that overflows i32: 859ab9cd27fSMel Chen; %exitcond.not = icmp eq i64 %inc, 4294967294 860ab9cd27fSMel Chen; Hence, the i32 will most certainly wrap and hit the sentinel value, and we 861ab9cd27fSMel Chen; cannot vectorize this case. 862ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) { 863*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound( 864*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { 865*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 866*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 867*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 868*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 869*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 870*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 871*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 872*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 873*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 874*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] 875*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 876*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 877*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 878*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 879*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 880*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 881*b3cba9beSMel Chen; 882*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound( 883*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { 884*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 885*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 886*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 887*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 888*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 889*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 890*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 891*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 892*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 893*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] 894*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 895*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 896*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 897*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 898*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 899*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 900*b3cba9beSMel Chen; 901*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound( 902*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { 903*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 904*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 905*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 906*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 907*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 908*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 909*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 910*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3 911*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CONV:%.*]] = trunc i64 [[IV]] to i32 912*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]] 913*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 914*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294 915*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 916*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 917*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 918*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 919ab9cd27fSMel Chen; 920ab9cd27fSMel Chenentry: 921ab9cd27fSMel Chen br label %for.body 922ab9cd27fSMel Chen 923ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 924ab9cd27fSMel Chen %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ] 925ab9cd27fSMel Chen %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ] 926ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 927ab9cd27fSMel Chen %0 = load i32, ptr %arrayidx, align 4 928ab9cd27fSMel Chen %cmp = icmp sgt i32 %0, 3 929ab9cd27fSMel Chen %conv = trunc i64 %iv to i32 930ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %conv, i32 %rdx 931ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 932ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, 4294967294 933ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 934ab9cd27fSMel Chen 935ab9cd27fSMel Chenexit: ; preds = %for.body 936ab9cd27fSMel Chen ret i32 %spec.select 937ab9cd27fSMel Chen} 938ab9cd27fSMel Chen 939ab9cd27fSMel Chen; Forbidding vectorization of the FindLastIV pattern involving a truncated 940ab9cd27fSMel Chen; induction variable in the absence of any loop guard. 941ab9cd27fSMel Chendefine i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) { 942*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard( 943*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) { 944*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 945*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 946*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 947*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 948*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 949*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 950*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 951*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 952*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 953*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 954*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 955*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32 956*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]] 957*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 958*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 959*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 960*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 961*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] 962*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[COND_LCSSA]] 963*b3cba9beSMel Chen; 964*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard( 965*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) { 966*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 967*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 968*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 969*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 970*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 971*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 972*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 973*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 974*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 975*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 976*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 977*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32 978*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]] 979*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 980*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 981*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 982*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 983*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] 984*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[COND_LCSSA]] 985*b3cba9beSMel Chen; 986*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard( 987*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) { 988*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 989*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 990*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 991*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 992*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 993*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 994*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 995*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 996*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 997*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 998*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 999*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i32 1000*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]] 1001*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1002*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 1003*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 1004*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 1005*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ] 1006*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[COND_LCSSA]] 1007ab9cd27fSMel Chen; 1008ab9cd27fSMel Chenentry: 1009ab9cd27fSMel Chen %wide.trip.count = zext i32 %n to i64 1010ab9cd27fSMel Chen br label %for.body 1011ab9cd27fSMel Chen 1012ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 1013ab9cd27fSMel Chen %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ] 1014ab9cd27fSMel Chen %rdx = phi i32 [ %start, %entry ], [ %cond, %for.body ] 1015ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1016ab9cd27fSMel Chen %0 = load i32, ptr %arrayidx, align 4 1017ab9cd27fSMel Chen %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv 1018ab9cd27fSMel Chen %1 = load i32, ptr %arrayidx2, align 4 1019ab9cd27fSMel Chen %cmp = icmp sgt i32 %0, %1 1020ab9cd27fSMel Chen %2 = trunc i64 %iv to i32 1021ab9cd27fSMel Chen %cond = select i1 %cmp, i32 %2, i32 %rdx 1022ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 1023ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, %wide.trip.count 1024ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 1025ab9cd27fSMel Chen 1026ab9cd27fSMel Chenexit: ; preds = %for.body 1027ab9cd27fSMel Chen ret i32 %cond 1028ab9cd27fSMel Chen} 1029ab9cd27fSMel Chen 1030ab9cd27fSMel Chen; Without loop guard, when the constant trip count exceeds the maximum signed 1031ab9cd27fSMel Chen; value of the reduction type, truncation may cause overflow. Therefore, 1032ab9cd27fSMel Chen; vectorizer is unable to guarantee that the induction variable is monotonic 1033ab9cd27fSMel Chen; increasing. 1034ab9cd27fSMel Chendefine i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { 1035*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub( 1036*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { 1037*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 1038*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 1039*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 1040*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1041*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 1042*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1043*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1044*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 1045*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 1046*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 1047*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1048*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649 1049*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 1050*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 1051*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 1052*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 1053*b3cba9beSMel Chen; 1054*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub( 1055*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) { 1056*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 1057*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 1058*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 1059*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1060*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 1061*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1062*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1063*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 1064*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 1065*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 1066*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1067*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649 1068*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 1069*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 1070*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 1071*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 1072*b3cba9beSMel Chen; 1073*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub( 1074*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) { 1075*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 1076*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 1077*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 1078*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1079*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] 1080*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] 1081*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4 1082*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 1083*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = trunc i64 [[IV]] to i32 1084*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]] 1085*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1086*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649 1087*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]] 1088*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 1089*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ] 1090*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i32 [[SPEC_SELECT_LCSSA]] 1091ab9cd27fSMel Chen; 1092ab9cd27fSMel Chenentry: 1093ab9cd27fSMel Chen br label %for.body 1094ab9cd27fSMel Chen 1095ab9cd27fSMel Chenfor.body: ; preds = %entry, %for.body 1096ab9cd27fSMel Chen %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ] 1097ab9cd27fSMel Chen %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ] 1098ab9cd27fSMel Chen %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv 1099ab9cd27fSMel Chen %0 = load float, ptr %arrayidx, align 4 1100ab9cd27fSMel Chen %cmp = fcmp fast olt float %0, 0.000000e+00 1101ab9cd27fSMel Chen %1 = trunc i64 %iv to i32 1102ab9cd27fSMel Chen %spec.select = select i1 %cmp, i32 %1, i32 %rdx 1103ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 1104ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, 2147483649 1105ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 1106ab9cd27fSMel Chen 1107ab9cd27fSMel Chenexit: ; preds = %for.body 1108ab9cd27fSMel Chen ret i32 %spec.select 1109ab9cd27fSMel Chen} 1110ab9cd27fSMel Chen 1111ab9cd27fSMel Chen; Even with loop guard protection, if the destination type of the truncation 1112ab9cd27fSMel Chen; instruction is smaller than the trip count type before extension, overflow 1113ab9cd27fSMel Chen; could still occur. 1114ab9cd27fSMel Chendefine i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(ptr %a, ptr %b, i16 %start, i32 %n) { 1115*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount( 1116*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) { 1117*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ENTRY:.*]]: 1118*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 1119*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 1120*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY_PREHEADER]]: 1121*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1122*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] 1123*b3cba9beSMel Chen; CHECK-VF4IC1: [[FOR_BODY]]: 1124*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1125*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 1126*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1127*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1128*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 1129*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 1130*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 1131*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16 1132*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]] 1133*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1134*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 1135*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 1136*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]: 1137*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ] 1138*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: br label %[[EXIT]] 1139*b3cba9beSMel Chen; CHECK-VF4IC1: [[EXIT]]: 1140*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] 1141*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT: ret i16 [[RDX_0_LCSSA]] 1142*b3cba9beSMel Chen; 1143*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount( 1144*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) { 1145*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ENTRY:.*]]: 1146*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 1147*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 1148*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY_PREHEADER]]: 1149*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1150*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] 1151*b3cba9beSMel Chen; CHECK-VF4IC4: [[FOR_BODY]]: 1152*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1153*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 1154*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1155*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1156*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 1157*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 1158*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 1159*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16 1160*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]] 1161*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1162*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 1163*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 1164*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]: 1165*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ] 1166*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: br label %[[EXIT]] 1167*b3cba9beSMel Chen; CHECK-VF4IC4: [[EXIT]]: 1168*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] 1169*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT: ret i16 [[RDX_0_LCSSA]] 1170*b3cba9beSMel Chen; 1171*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount( 1172*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) { 1173*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ENTRY:.*]]: 1174*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[N]], 0 1175*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]] 1176*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY_PREHEADER]]: 1177*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 1178*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] 1179*b3cba9beSMel Chen; CHECK-VF1IC4: [[FOR_BODY]]: 1180*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] 1181*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] 1182*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 1183*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 1184*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] 1185*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 1186*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]] 1187*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = trunc i64 [[IV]] to i16 1188*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]] 1189*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 1190*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] 1191*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]] 1192*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]: 1193*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ] 1194*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: br label %[[EXIT]] 1195*b3cba9beSMel Chen; CHECK-VF1IC4: [[EXIT]]: 1196*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ] 1197*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT: ret i16 [[RDX_0_LCSSA]] 1198ab9cd27fSMel Chen; 1199ab9cd27fSMel Chenentry: 1200ab9cd27fSMel Chen %cmp9 = icmp sgt i32 %n, 0 1201ab9cd27fSMel Chen br i1 %cmp9, label %for.body.preheader, label %exit 1202ab9cd27fSMel Chen 1203ab9cd27fSMel Chenfor.body.preheader: ; preds = %entry 1204ab9cd27fSMel Chen %wide.trip.count = zext i32 %n to i64 1205ab9cd27fSMel Chen br label %for.body 1206ab9cd27fSMel Chen 1207ab9cd27fSMel Chenfor.body: ; preds = %for.body.preheader, %for.body 1208ab9cd27fSMel Chen %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ] 1209ab9cd27fSMel Chen %rdx = phi i16 [ %start, %for.body.preheader ], [ %cond, %for.body ] 1210ab9cd27fSMel Chen %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 1211ab9cd27fSMel Chen %0 = load i32, ptr %arrayidx, align 4 1212ab9cd27fSMel Chen %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv 1213ab9cd27fSMel Chen %1 = load i32, ptr %arrayidx2, align 4 1214ab9cd27fSMel Chen %cmp3 = icmp sgt i32 %0, %1 1215ab9cd27fSMel Chen %2 = trunc i64 %iv to i16 1216ab9cd27fSMel Chen %cond = select i1 %cmp3, i16 %2, i16 %rdx 1217ab9cd27fSMel Chen %inc = add nuw nsw i64 %iv, 1 1218ab9cd27fSMel Chen %exitcond.not = icmp eq i64 %inc, %wide.trip.count 1219ab9cd27fSMel Chen br i1 %exitcond.not, label %exit, label %for.body 1220ab9cd27fSMel Chen 1221ab9cd27fSMel Chenexit: ; preds = %for.body, %entry 1222ab9cd27fSMel Chen %rdx.0.lcssa = phi i16 [ %start, %entry ], [ %cond, %for.body ] 1223ab9cd27fSMel Chen ret i16 %rdx.0.lcssa 1224ab9cd27fSMel Chen} 1225*b3cba9beSMel Chen;. 1226*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 1227*b3cba9beSMel Chen; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 1228*b3cba9beSMel Chen; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 1229*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 1230*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 1231*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 1232*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 1233*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 1234*b3cba9beSMel Chen;. 1235*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 1236*b3cba9beSMel Chen; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 1237*b3cba9beSMel Chen; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 1238*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} 1239*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 1240*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} 1241*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 1242*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} 1243*b3cba9beSMel Chen;. 1244*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} 1245*b3cba9beSMel Chen; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} 1246*b3cba9beSMel Chen; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} 1247*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} 1248*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} 1249*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]} 1250*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} 1251*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]} 1252*b3cba9beSMel Chen;. 1253