xref: /llvm-project/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (revision b3cba9be41bfa89bc0ec212706c6028a901e127a)
190f5c8b7SMel Chen; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC1
3*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4
4*b3cba9beSMel Chen; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4
5ab9cd27fSMel Chen
6*b3cba9beSMel Chen; About the truncated test cases, the range analysis of induction variable is
7*b3cba9beSMel Chen; used to ensure the induction variable is always greater than the sentinal
8*b3cba9beSMel Chen; value. The case is vectorizable if the truncated induction variable is
9*b3cba9beSMel Chen; monotonic increasing, and not equals to the sentinal.
10ab9cd27fSMel Chendefine i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) {
11*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
12*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
13*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
14*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
15*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
16*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY_PREHEADER]]:
17*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
18*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
19*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
20*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_PH]]:
21*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
22*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
23*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
24*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_BODY]]:
25*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
26*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
27*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
28*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
29*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
30*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
31*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
32*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
33*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
34*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
35*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
36*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38*b3cba9beSMel Chen; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
39*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
40*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
41*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
42*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
43*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
44*b3cba9beSMel Chen; CHECK-VF4IC1:       [[SCALAR_PH]]:
45*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
46*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
47*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
48*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
49*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
50*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
51*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
52*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
53*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
54*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = trunc i64 [[IV]] to i32
55*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
56*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
57*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
58*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
59*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT_LOOPEXIT]]:
60*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
61*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[EXIT]]
62*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
63*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
64*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[RDX_LCSSA]]
65*b3cba9beSMel Chen;
66*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
67*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
68*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
69*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
70*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
71*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY_PREHEADER]]:
72*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
73*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 16
74*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
75*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_PH]]:
76*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 16
77*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
78*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
79*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_BODY]]:
80*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
81*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
82*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
83*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
84*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
85*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
86*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
87*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
88*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
89*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
90*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
91*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
92*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
93*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8
94*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12
95*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
96*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
97*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
98*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
99*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
100*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
101*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
102*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
103*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
104*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
105*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
106*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
107*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
108*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
109*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
110*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
111*b3cba9beSMel Chen; CHECK-VF4IC4:       [[MIDDLE_BLOCK]]:
112*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
113*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
114*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
115*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
116*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
117*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331
118*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
119*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
120*b3cba9beSMel Chen; CHECK-VF4IC4:       [[SCALAR_PH]]:
121*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
122*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
123*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
124*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
125*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
126*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
127*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
128*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
129*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3
130*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = trunc i64 [[IV]] to i32
131*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
132*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
133*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
134*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
135*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT_LOOPEXIT]]:
136*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
137*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[EXIT]]
138*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
139*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
140*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[RDX_LCSSA]]
141*b3cba9beSMel Chen;
142*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit(
143*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
144*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
145*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i32 [[N]], 0
146*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
147*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY_PREHEADER]]:
148*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
149*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
150*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
151*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_PH]]:
152*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
153*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
154*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
155*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_BODY]]:
156*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
157*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
158*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
159*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
160*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
161*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
162*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
163*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
164*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
165*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
166*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 0
167*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], 1
168*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = add i32 [[TMP4]], 2
169*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = add i32 [[TMP4]], 3
170*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
171*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
172*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
173*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
174*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
175*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
176*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
177*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
178*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
179*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
180*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
181*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
182*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
183*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
184*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
185*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
186*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
187*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
188*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
189*b3cba9beSMel Chen; CHECK-VF1IC4:       [[MIDDLE_BLOCK]]:
190*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
191*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
192*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
193*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
194*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
195*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
196*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]]
197*b3cba9beSMel Chen; CHECK-VF1IC4:       [[SCALAR_PH]]:
198*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
199*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
200*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
201*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
202*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
203*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
204*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
205*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
206*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
207*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP27:%.*]] = trunc i64 [[IV]] to i32
208*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
209*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
210*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
211*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
212*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT_LOOPEXIT]]:
213*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
214*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[EXIT]]
215*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
216*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
217*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[RDX_LCSSA]]
218ab9cd27fSMel Chen;
219ab9cd27fSMel Chenentry:
220ab9cd27fSMel Chen  %cmp.sgt = icmp sgt i32 %n, 0
221ab9cd27fSMel Chen  br i1 %cmp.sgt, label %for.body.preheader, label %exit
222ab9cd27fSMel Chen
223ab9cd27fSMel Chenfor.body.preheader:                               ; preds = %entry
224ab9cd27fSMel Chen  %wide.trip.count = zext i32 %n to i64
225ab9cd27fSMel Chen  br label %for.body
226ab9cd27fSMel Chen
227ab9cd27fSMel Chenfor.body:                                         ; preds = %for.body.preheader, %for.body
228ab9cd27fSMel Chen  %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
229ab9cd27fSMel Chen  %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
230ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
231ab9cd27fSMel Chen  %0 = load i64, ptr %arrayidx, align 8
232ab9cd27fSMel Chen  %cmp = icmp sgt i64 %0, 3
233ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
234ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %1, i32 %rdx
235ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
236ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, %wide.trip.count
237ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
238ab9cd27fSMel Chen
239ab9cd27fSMel Chenexit:                                            ; preds = %for.body, %entry
240ab9cd27fSMel Chen  %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
241ab9cd27fSMel Chen  ret i32 %rdx.lcssa
242ab9cd27fSMel Chen}
243ab9cd27fSMel Chen
244*b3cba9beSMel Chen; Without loop guard, the range analysis is also able to base on the constant
245*b3cba9beSMel Chen; trip count.
246ab9cd27fSMel Chendefine i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) {
247*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
248*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
249*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
250*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
251*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_PH]]:
252*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
253*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_BODY]]:
254*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
255*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
256*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
257*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
258*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
259*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
260*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
261*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
262*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
263*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
264*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
265*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
266*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
267*b3cba9beSMel Chen; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
268*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
269*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
270*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 331
271*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
272*b3cba9beSMel Chen; CHECK-VF4IC1:       [[SCALAR_PH]]:
273*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
274*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
275*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
276*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
277*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
278*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
279*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
280*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
281*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP7]], 3
282*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = trunc i64 [[IV]] to i32
283*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
284*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
285*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
286*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
287*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
288*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
289*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
290*b3cba9beSMel Chen;
291*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
292*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
293*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
294*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
295*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_PH]]:
296*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
297*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_BODY]]:
298*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
299*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
300*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
301*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
302*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
303*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
304*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
305*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
306*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
307*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
308*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
309*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
310*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4
311*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8
312*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12
313*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
314*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
315*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
316*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
317*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], splat (i64 3)
318*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD4]], splat (i64 3)
319*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD5]], splat (i64 3)
320*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD6]], splat (i64 3)
321*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
322*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
323*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
324*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
325*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
326*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
327*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
328*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
329*b3cba9beSMel Chen; CHECK-VF4IC4:       [[MIDDLE_BLOCK]]:
330*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
331*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
332*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
333*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
334*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
335*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 331
336*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
337*b3cba9beSMel Chen; CHECK-VF4IC4:       [[SCALAR_PH]]:
338*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
339*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
340*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
341*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
342*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
343*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
344*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
345*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
346*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP16]], 3
347*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = trunc i64 [[IV]] to i32
348*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
349*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
350*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
351*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
352*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
353*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
354*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
355*b3cba9beSMel Chen;
356*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit(
357*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
358*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
359*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
360*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_PH]]:
361*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
362*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_BODY]]:
363*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
364*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
365*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
366*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
367*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
368*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
369*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
370*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
371*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
372*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
373*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 0
374*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], 1
375*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = add i32 [[TMP4]], 2
376*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = add i32 [[TMP4]], 3
377*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]]
378*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
379*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
380*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
381*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP13:%.*]] = load i64, ptr [[TMP9]], align 8
382*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP10]], align 8
383*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP11]], align 8
384*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = load i64, ptr [[TMP12]], align 8
385*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp sgt i64 [[TMP13]], 3
386*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp sgt i64 [[TMP14]], 3
387*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp sgt i64 [[TMP15]], 3
388*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP20:%.*]] = icmp sgt i64 [[TMP16]], 3
389*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
390*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
391*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
392*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
393*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
394*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20000
395*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
396*b3cba9beSMel Chen; CHECK-VF1IC4:       [[MIDDLE_BLOCK]]:
397*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
398*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
399*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
400*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
401*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 331
402*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
403*b3cba9beSMel Chen; CHECK-VF1IC4:       [[SCALAR_PH]]:
404*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 20000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
405*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[ENTRY]] ]
406*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
407*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
408*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
409*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
410*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
411*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP26:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
412*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[TMP26]], 3
413*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP27:%.*]] = trunc i64 [[IV]] to i32
414*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
415*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
416*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 20000
417*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
418*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
419*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
420*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
421ab9cd27fSMel Chen;
422ab9cd27fSMel Chenentry:
423ab9cd27fSMel Chen  br label %for.body
424ab9cd27fSMel Chen
425ab9cd27fSMel Chenfor.body:                                         ; preds = %entry, %for.body
426ab9cd27fSMel Chen  %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
427ab9cd27fSMel Chen  %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
428ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
429ab9cd27fSMel Chen  %0 = load i64, ptr %arrayidx, align 8
430ab9cd27fSMel Chen  %cmp = icmp sgt i64 %0, 3
431ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
432ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %1, i32 %rdx
433ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
434ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, 20000
435ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
436ab9cd27fSMel Chen
437ab9cd27fSMel Chenexit:                                           ; preds = %for.body
438ab9cd27fSMel Chen  ret i32 %spec.select
439ab9cd27fSMel Chen}
440ab9cd27fSMel Chen
441ab9cd27fSMel Chen; Without loop guard, the maximum constant trip count that can be vectorized is
442ab9cd27fSMel Chen; the signed maximum value of reduction type.
443ab9cd27fSMel Chendefine i32 @select_fcmp_max_valid_const_ub(ptr %a) {
444*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @select_fcmp_max_valid_const_ub(
445*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
446*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
447*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
448*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_PH]]:
449*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
450*b3cba9beSMel Chen; CHECK-VF4IC1:       [[VECTOR_BODY]]:
451*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
452*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
453*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
454*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
455*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
456*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
457*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
458*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer
459*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
460*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
461*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
462*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
463*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
464*b3cba9beSMel Chen; CHECK-VF4IC1:       [[MIDDLE_BLOCK]]:
465*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP4]])
466*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP6]], -2147483648
467*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP6]], i32 -1
468*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
469*b3cba9beSMel Chen; CHECK-VF4IC1:       [[SCALAR_PH]]:
470*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
471*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
472*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
473*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
474*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
475*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
476*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
477*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4
478*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP7]], 0.000000e+00
479*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = trunc i64 [[IV]] to i32
480*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP8]], i32 [[RDX]]
481*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
482*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
483*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
484*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
485*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
486*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
487*b3cba9beSMel Chen;
488*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
489*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
490*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
491*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
492*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_PH]]:
493*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
494*b3cba9beSMel Chen; CHECK-VF4IC4:       [[VECTOR_BODY]]:
495*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
496*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
497*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
498*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ]
499*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -2147483648), %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ]
500*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
501*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
502*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
503*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
504*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
505*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
506*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
507*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4
508*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8
509*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12
510*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
511*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4
512*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
513*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
514*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer
515*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD4]], zeroinitializer
516*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD5]], zeroinitializer
517*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD6]], zeroinitializer
518*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
519*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]]
520*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]]
521*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]]
522*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
523*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
524*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
525*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
526*b3cba9beSMel Chen; CHECK-VF4IC4:       [[MIDDLE_BLOCK]]:
527*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> [[TMP11]])
528*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP12]])
529*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX8:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[RDX_MINMAX7]], <4 x i32> [[TMP13]])
530*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[RDX_MINMAX8]])
531*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[TMP15]], -2147483648
532*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[TMP15]], i32 -1
533*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
534*b3cba9beSMel Chen; CHECK-VF4IC4:       [[SCALAR_PH]]:
535*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
536*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
537*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
538*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
539*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
540*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
541*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
542*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX]], align 4
543*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00
544*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = trunc i64 [[IV]] to i32
545*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP17]], i32 [[RDX]]
546*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
547*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
548*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
549*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
550*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
551*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
552*b3cba9beSMel Chen;
553*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @select_fcmp_max_valid_const_ub(
554*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
555*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
556*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
557*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_PH]]:
558*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[VECTOR_BODY:.*]]
559*b3cba9beSMel Chen; CHECK-VF1IC4:       [[VECTOR_BODY]]:
560*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
561*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ]
562*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ]
563*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
564*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ -2147483648, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ]
565*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
566*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
567*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
568*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
569*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDEX]] to i32
570*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 0
571*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = add i32 [[TMP4]], 1
572*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = add i32 [[TMP4]], 2
573*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = add i32 [[TMP4]], 3
574*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
575*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP1]]
576*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
577*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
578*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP9]], align 4
579*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP14:%.*]] = load float, ptr [[TMP10]], align 4
580*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP15:%.*]] = load float, ptr [[TMP11]], align 4
581*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP12]], align 4
582*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = fcmp fast olt float [[TMP13]], 0.000000e+00
583*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = fcmp fast olt float [[TMP14]], 0.000000e+00
584*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = fcmp fast olt float [[TMP15]], 0.000000e+00
585*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP20:%.*]] = fcmp fast olt float [[TMP16]], 0.000000e+00
586*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i32 [[TMP5]], i32 [[VEC_PHI]]
587*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i32 [[TMP6]], i32 [[VEC_PHI1]]
588*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i32 [[TMP7]], i32 [[VEC_PHI2]]
589*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP24]] = select i1 [[TMP20]], i32 [[TMP8]], i32 [[VEC_PHI3]]
590*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
591*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648
592*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
593*b3cba9beSMel Chen; CHECK-VF1IC4:       [[MIDDLE_BLOCK]]:
594*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP21]], i32 [[TMP22]])
595*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX4:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX]], i32 [[TMP23]])
596*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX5:%.*]] = call i32 @llvm.smax.i32(i32 [[RDX_MINMAX4]], i32 [[TMP24]])
597*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[RDX_MINMAX5]], -2147483648
598*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[RDX_MINMAX5]], i32 -1
599*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
600*b3cba9beSMel Chen; CHECK-VF1IC4:       [[SCALAR_PH]]:
601*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 2147483648, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
602*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
603*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
604*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
605*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
606*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
607*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
608*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP26:%.*]] = load float, ptr [[ARRAYIDX]], align 4
609*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP26]], 0.000000e+00
610*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP27:%.*]] = trunc i64 [[IV]] to i32
611*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP27]], i32 [[RDX]]
612*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
613*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483648
614*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
615*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
616*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ]
617*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
618ab9cd27fSMel Chen;
619ab9cd27fSMel Chenentry:
620ab9cd27fSMel Chen  br label %for.body
621ab9cd27fSMel Chen
622ab9cd27fSMel Chenfor.body:                                        ; preds = %entry, %for.body
623ab9cd27fSMel Chen  %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
624ab9cd27fSMel Chen  %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
625ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
626ab9cd27fSMel Chen  %0 = load float, ptr %arrayidx, align 4
627ab9cd27fSMel Chen  %cmp = fcmp fast olt float %0, 0.000000e+00
628ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
629ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %1, i32 %rdx
630ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
631ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, 2147483648
632ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
633ab9cd27fSMel Chen
634ab9cd27fSMel Chenexit:                                            ; preds = %for.body
635ab9cd27fSMel Chen  ret i32 %spec.select
636ab9cd27fSMel Chen}
637ab9cd27fSMel Chen
638ab9cd27fSMel Chen; Negative tests
639ab9cd27fSMel Chen
640ab9cd27fSMel Chen; This test can theoretically be vectorized, but only with a runtime-check.
641ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is:
642ab9cd27fSMel Chen;   %1 = trunc i64 %iv to i32
643ab9cd27fSMel Chen; However, the loop guard is an i64:
644ab9cd27fSMel Chen;   %cmp.sgt = icmp sgt i64 %n, 0
645ab9cd27fSMel Chen; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
646ab9cd27fSMel Chen; sentinel value), and need a runtime-check to vectorize this case.
647ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(ptr %a, i64 %n) {
648*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
649*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
650*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
651*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
652*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
653*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY_PREHEADER]]:
654*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
655*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
656*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
657*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
658*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
659*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
660*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
661*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
662*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
663*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
664*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
665*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
666*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT_LOOPEXIT]]:
667*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
668*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[EXIT]]
669*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
670*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
671*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[RDX_LCSSA]]
672*b3cba9beSMel Chen;
673*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
674*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
675*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
676*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
677*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
678*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY_PREHEADER]]:
679*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
680*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
681*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
682*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
683*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
684*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
685*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
686*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
687*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
688*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
689*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
690*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
691*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT_LOOPEXIT]]:
692*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
693*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[EXIT]]
694*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
695*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
696*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[RDX_LCSSA]]
697*b3cba9beSMel Chen;
698*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unwidened_exit(
699*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
700*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
701*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i64 [[N]], 0
702*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[CMP_SGT]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
703*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY_PREHEADER]]:
704*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
705*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
706*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
707*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ], [ 331, %[[FOR_BODY_PREHEADER]] ]
708*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
709*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
710*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
711*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
712*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
713*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
714*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
715*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
716*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT_LOOPEXIT]]:
717*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
718*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[EXIT]]
719*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
720*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
721*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[RDX_LCSSA]]
722ab9cd27fSMel Chen;
723ab9cd27fSMel Chenentry:
724ab9cd27fSMel Chen  %cmp.sgt = icmp sgt i64 %n, 0
725ab9cd27fSMel Chen  br i1 %cmp.sgt, label %for.body, label %exit
726ab9cd27fSMel Chen
727ab9cd27fSMel Chenfor.body:                                         ; preds = %entry, %for.body
728ab9cd27fSMel Chen  %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
729ab9cd27fSMel Chen  %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
730ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
731ab9cd27fSMel Chen  %0 = load i32, ptr %arrayidx, align 4
732ab9cd27fSMel Chen  %cmp = icmp sgt i32 %0, 3
733ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
734ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %1, i32 %rdx
735ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
736ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, %n
737ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
738ab9cd27fSMel Chen
739ab9cd27fSMel Chenexit:                                             ; preds = %for.body, %entry
740ab9cd27fSMel Chen  %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
741ab9cd27fSMel Chen  ret i32 %rdx.lcssa
742ab9cd27fSMel Chen}
743ab9cd27fSMel Chen
744ab9cd27fSMel Chen; This test can theoretically be vectorized, but only with a runtime-check.
745ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is:
746ab9cd27fSMel Chen;   %1 = trunc i64 %iv to i32
747ab9cd27fSMel Chen; However, the loop guard is unsigned:
748ab9cd27fSMel Chen;   %cmp.not = icmp eq i32 %n, 0
749ab9cd27fSMel Chen; We cannot guarantee that %iv won't overflow an i32 value (and hence hit the
750ab9cd27fSMel Chen; sentinel value), and need a runtime-check to vectorize this case.
751ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) {
752*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
753*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
754*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
755*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
756*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
757*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY_PREHEADER]]:
758*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
759*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
760*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
761*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
762*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
763*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
764*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
765*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
766*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
767*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
768*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
769*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
770*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
771*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT_LOOPEXIT]]:
772*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
773*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[EXIT]]
774*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
775*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
776*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[RDX_LCSSA]]
777*b3cba9beSMel Chen;
778*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
779*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
780*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
781*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
782*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
783*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY_PREHEADER]]:
784*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
785*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
786*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
787*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
788*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
789*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
790*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
791*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
792*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
793*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
794*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
795*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
796*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
797*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT_LOOPEXIT]]:
798*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
799*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[EXIT]]
800*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
801*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
802*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[RDX_LCSSA]]
803*b3cba9beSMel Chen;
804*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_const_truncated_iv_unsigned_loop_guard(
805*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) {
806*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
807*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[N]], 0
808*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY_PREHEADER:.*]]
809*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY_PREHEADER]]:
810*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
811*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
812*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
813*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
814*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[FOR_BODY_PREHEADER]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
815*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
816*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
817*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP0]], 3
818*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
819*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP1]], i32 [[TMP1]], i32 [[RDX]]
820*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
821*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
822*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
823*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT_LOOPEXIT]]:
824*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
825*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[EXIT]]
826*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
827*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ]
828*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[RDX_LCSSA]]
829ab9cd27fSMel Chen;
830ab9cd27fSMel Chenentry:
831ab9cd27fSMel Chen  %cmp.not = icmp eq i32 %n, 0
832ab9cd27fSMel Chen  br i1 %cmp.not, label %exit, label %for.body.preheader
833ab9cd27fSMel Chen
834ab9cd27fSMel Chenfor.body.preheader:                               ; preds = %entry
835ab9cd27fSMel Chen  %wide.trip.count = zext i32 %n to i64
836ab9cd27fSMel Chen  br label %for.body
837ab9cd27fSMel Chen
838ab9cd27fSMel Chenfor.body:                                         ; preds = %for.body.preheader, %for.body
839ab9cd27fSMel Chen  %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
840ab9cd27fSMel Chen  %rdx = phi i32 [ 331, %for.body.preheader ], [ %spec.select, %for.body ]
841ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
842ab9cd27fSMel Chen  %0 = load i32, ptr %arrayidx, align 4
843ab9cd27fSMel Chen  %cmp1 = icmp sgt i32 %0, 3
844ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
845ab9cd27fSMel Chen  %spec.select = select i1 %cmp1, i32 %1, i32 %rdx
846ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
847ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, %wide.trip.count
848ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
849ab9cd27fSMel Chen
850ab9cd27fSMel Chenexit:                                             ; preds = %for.body, %entry
851ab9cd27fSMel Chen  %rdx.lcssa = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
852ab9cd27fSMel Chen  ret i32 %rdx.lcssa
853ab9cd27fSMel Chen}
854ab9cd27fSMel Chen
855ab9cd27fSMel Chen; This test cannot be vectorized, even with a runtime check.
856ab9cd27fSMel Chen; The construct that are introduced by IndVarSimplify is:
857ab9cd27fSMel Chen;   %1 = trunc i64 %iv to i32
858ab9cd27fSMel Chen; However, the loop exit condition is a constant that overflows i32:
859ab9cd27fSMel Chen;   %exitcond.not = icmp eq i64 %inc, 4294967294
860ab9cd27fSMel Chen; Hence, the i32 will most certainly wrap and hit the sentinel value, and we
861ab9cd27fSMel Chen; cannot vectorize this case.
862ab9cd27fSMel Chendefine i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(ptr %a) {
863*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
864*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
865*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
866*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
867*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
868*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
869*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
870*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
871*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
872*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
873*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CONV:%.*]] = trunc i64 [[IV]] to i32
874*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
875*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
876*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
877*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
878*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
879*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
880*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
881*b3cba9beSMel Chen;
882*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
883*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
884*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
885*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
886*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
887*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
888*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
889*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
890*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
891*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
892*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CONV:%.*]] = trunc i64 [[IV]] to i32
893*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
894*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
895*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
896*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
897*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
898*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
899*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
900*b3cba9beSMel Chen;
901*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_icmp_truncated_iv_out_of_bound(
902*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
903*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
904*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
905*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
906*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 2147483646, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
907*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
908*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
909*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
910*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 3
911*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CONV:%.*]] = trunc i64 [[IV]] to i32
912*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[CONV]], i32 [[RDX]]
913*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
914*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 4294967294
915*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
916*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
917*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
918*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
919ab9cd27fSMel Chen;
920ab9cd27fSMel Chenentry:
921ab9cd27fSMel Chen  br label %for.body
922ab9cd27fSMel Chen
923ab9cd27fSMel Chenfor.body:                                         ; preds = %entry, %for.body
924ab9cd27fSMel Chen  %iv = phi i64 [ 2147483646, %entry ], [ %inc, %for.body ]
925ab9cd27fSMel Chen  %rdx = phi i32 [ 331, %entry ], [ %spec.select, %for.body ]
926ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
927ab9cd27fSMel Chen  %0 = load i32, ptr %arrayidx, align 4
928ab9cd27fSMel Chen  %cmp = icmp sgt i32 %0, 3
929ab9cd27fSMel Chen  %conv = trunc i64 %iv to i32
930ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %conv, i32 %rdx
931ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
932ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, 4294967294
933ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
934ab9cd27fSMel Chen
935ab9cd27fSMel Chenexit:                                             ; preds = %for.body
936ab9cd27fSMel Chen  ret i32 %spec.select
937ab9cd27fSMel Chen}
938ab9cd27fSMel Chen
939ab9cd27fSMel Chen; Forbidding vectorization of the FindLastIV pattern involving a truncated
940ab9cd27fSMel Chen; induction variable in the absence of any loop guard.
941ab9cd27fSMel Chendefine i32 @not_vectorized_select_iv_icmp_no_guard(ptr %a, ptr %b, i32 %start, i32 %n) {
942*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
943*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
944*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
945*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
946*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
947*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
948*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
949*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
950*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
951*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
952*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
953*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
954*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
955*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i32
956*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
957*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
958*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
959*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
960*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
961*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
962*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[COND_LCSSA]]
963*b3cba9beSMel Chen;
964*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
965*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
966*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
967*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
968*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
969*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
970*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
971*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
972*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
973*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
974*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
975*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
976*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
977*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i32
978*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
979*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
980*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
981*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
982*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
983*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
984*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[COND_LCSSA]]
985*b3cba9beSMel Chen;
986*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_iv_icmp_no_guard(
987*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[START:%.*]], i32 [[N:%.*]]) {
988*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
989*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
990*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
991*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
992*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
993*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
994*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
995*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
996*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
997*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
998*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
999*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i32
1000*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[COND]] = select i1 [[CMP]], i32 [[TMP2]], i32 [[RDX]]
1001*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1002*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1003*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1004*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
1005*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
1006*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[COND_LCSSA]]
1007ab9cd27fSMel Chen;
1008ab9cd27fSMel Chenentry:
1009ab9cd27fSMel Chen  %wide.trip.count = zext i32 %n to i64
1010ab9cd27fSMel Chen  br label %for.body
1011ab9cd27fSMel Chen
1012ab9cd27fSMel Chenfor.body:                                         ; preds = %entry, %for.body
1013ab9cd27fSMel Chen  %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
1014ab9cd27fSMel Chen  %rdx = phi i32 [ %start, %entry ], [ %cond, %for.body ]
1015ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1016ab9cd27fSMel Chen  %0 = load i32, ptr %arrayidx, align 4
1017ab9cd27fSMel Chen  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
1018ab9cd27fSMel Chen  %1 = load i32, ptr %arrayidx2, align 4
1019ab9cd27fSMel Chen  %cmp = icmp sgt i32 %0, %1
1020ab9cd27fSMel Chen  %2 = trunc i64 %iv to i32
1021ab9cd27fSMel Chen  %cond = select i1 %cmp, i32 %2, i32 %rdx
1022ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
1023ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, %wide.trip.count
1024ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
1025ab9cd27fSMel Chen
1026ab9cd27fSMel Chenexit:                                             ; preds = %for.body
1027ab9cd27fSMel Chen  ret i32 %cond
1028ab9cd27fSMel Chen}
1029ab9cd27fSMel Chen
1030ab9cd27fSMel Chen; Without loop guard, when the constant trip count exceeds the maximum signed
1031ab9cd27fSMel Chen; value of the reduction type, truncation may cause overflow. Therefore,
1032ab9cd27fSMel Chen; vectorizer is unable to guarantee that the induction variable is monotonic
1033ab9cd27fSMel Chen; increasing.
1034ab9cd27fSMel Chendefine i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) {
1035*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1036*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) {
1037*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
1038*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
1039*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
1040*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1041*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1042*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1043*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1044*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1045*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1046*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1047*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1048*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1049*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1050*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
1051*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1052*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
1053*b3cba9beSMel Chen;
1054*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1055*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]]) {
1056*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
1057*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
1058*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
1059*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1060*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1061*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1062*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1063*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1064*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1065*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1066*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1067*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1068*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1069*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
1070*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1071*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
1072*b3cba9beSMel Chen;
1073*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i32 @not_vectorized_select_fcmp_invalid_const_ub(
1074*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]]) {
1075*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
1076*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
1077*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
1078*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1079*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i32 [ -1, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1080*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1081*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1082*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00
1083*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = trunc i64 [[IV]] to i32
1084*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP]], i32 [[TMP1]], i32 [[RDX]]
1085*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1086*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 2147483649
1087*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1088*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
1089*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1090*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i32 [[SPEC_SELECT_LCSSA]]
1091ab9cd27fSMel Chen;
1092ab9cd27fSMel Chenentry:
1093ab9cd27fSMel Chen  br label %for.body
1094ab9cd27fSMel Chen
1095ab9cd27fSMel Chenfor.body:                                        ; preds = %entry, %for.body
1096ab9cd27fSMel Chen  %iv = phi i64 [ 0, %entry ], [ %inc, %for.body ]
1097ab9cd27fSMel Chen  %rdx = phi i32 [ -1, %entry ], [ %spec.select, %for.body ]
1098ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1099ab9cd27fSMel Chen  %0 = load float, ptr %arrayidx, align 4
1100ab9cd27fSMel Chen  %cmp = fcmp fast olt float %0, 0.000000e+00
1101ab9cd27fSMel Chen  %1 = trunc i64 %iv to i32
1102ab9cd27fSMel Chen  %spec.select = select i1 %cmp, i32 %1, i32 %rdx
1103ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
1104ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, 2147483649
1105ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
1106ab9cd27fSMel Chen
1107ab9cd27fSMel Chenexit:                                            ; preds = %for.body
1108ab9cd27fSMel Chen  ret i32 %spec.select
1109ab9cd27fSMel Chen}
1110ab9cd27fSMel Chen
1111ab9cd27fSMel Chen; Even with loop guard protection, if the destination type of the truncation
1112ab9cd27fSMel Chen; instruction is smaller than the trip count type before extension, overflow
1113ab9cd27fSMel Chen; could still occur.
1114ab9cd27fSMel Chendefine i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(ptr %a, ptr %b, i16 %start, i32 %n) {
1115*b3cba9beSMel Chen; CHECK-VF4IC1-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1116*b3cba9beSMel Chen; CHECK-VF4IC1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1117*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:  [[ENTRY:.*]]:
1118*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1119*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1120*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY_PREHEADER]]:
1121*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1122*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[FOR_BODY:.*]]
1123*b3cba9beSMel Chen; CHECK-VF4IC1:       [[FOR_BODY]]:
1124*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1125*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1126*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1127*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1128*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1129*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1130*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1131*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1132*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1133*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1134*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1135*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1136*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT_LOOPEXIT]]:
1137*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1138*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    br label %[[EXIT]]
1139*b3cba9beSMel Chen; CHECK-VF4IC1:       [[EXIT]]:
1140*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1141*b3cba9beSMel Chen; CHECK-VF4IC1-NEXT:    ret i16 [[RDX_0_LCSSA]]
1142*b3cba9beSMel Chen;
1143*b3cba9beSMel Chen; CHECK-VF4IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1144*b3cba9beSMel Chen; CHECK-VF4IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1145*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:  [[ENTRY:.*]]:
1146*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1147*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1148*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY_PREHEADER]]:
1149*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1150*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[FOR_BODY:.*]]
1151*b3cba9beSMel Chen; CHECK-VF4IC4:       [[FOR_BODY]]:
1152*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1153*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1154*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1155*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1156*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1157*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1158*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1159*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1160*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1161*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1162*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1163*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1164*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT_LOOPEXIT]]:
1165*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1166*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    br label %[[EXIT]]
1167*b3cba9beSMel Chen; CHECK-VF4IC4:       [[EXIT]]:
1168*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1169*b3cba9beSMel Chen; CHECK-VF4IC4-NEXT:    ret i16 [[RDX_0_LCSSA]]
1170*b3cba9beSMel Chen;
1171*b3cba9beSMel Chen; CHECK-VF1IC4-LABEL: define i16 @not_vectorized_select_iv_icmp_overflow_unwidened_tripcount(
1172*b3cba9beSMel Chen; CHECK-VF1IC4-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i16 [[START:%.*]], i32 [[N:%.*]]) {
1173*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:  [[ENTRY:.*]]:
1174*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1175*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[EXIT:.*]]
1176*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY_PREHEADER]]:
1177*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1178*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[FOR_BODY:.*]]
1179*b3cba9beSMel Chen; CHECK-VF1IC4:       [[FOR_BODY]]:
1180*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
1181*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX:%.*]] = phi i16 [ [[START]], %[[FOR_BODY_PREHEADER]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1182*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1183*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1184*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
1185*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
1186*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[CMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
1187*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = trunc i64 [[IV]] to i16
1188*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[COND]] = select i1 [[CMP3]], i16 [[TMP2]], i16 [[RDX]]
1189*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[INC]] = add nuw nsw i64 [[IV]], 1
1190*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]]
1191*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]]
1192*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT_LOOPEXIT]]:
1193*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[COND_LCSSA:%.*]] = phi i16 [ [[COND]], %[[FOR_BODY]] ]
1194*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    br label %[[EXIT]]
1195*b3cba9beSMel Chen; CHECK-VF1IC4:       [[EXIT]]:
1196*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    [[RDX_0_LCSSA:%.*]] = phi i16 [ [[START]], %[[ENTRY]] ], [ [[COND_LCSSA]], %[[EXIT_LOOPEXIT]] ]
1197*b3cba9beSMel Chen; CHECK-VF1IC4-NEXT:    ret i16 [[RDX_0_LCSSA]]
1198ab9cd27fSMel Chen;
1199ab9cd27fSMel Chenentry:
1200ab9cd27fSMel Chen  %cmp9 = icmp sgt i32 %n, 0
1201ab9cd27fSMel Chen  br i1 %cmp9, label %for.body.preheader, label %exit
1202ab9cd27fSMel Chen
1203ab9cd27fSMel Chenfor.body.preheader:                               ; preds = %entry
1204ab9cd27fSMel Chen  %wide.trip.count = zext i32 %n to i64
1205ab9cd27fSMel Chen  br label %for.body
1206ab9cd27fSMel Chen
1207ab9cd27fSMel Chenfor.body:                                         ; preds = %for.body.preheader, %for.body
1208ab9cd27fSMel Chen  %iv = phi i64 [ 0, %for.body.preheader ], [ %inc, %for.body ]
1209ab9cd27fSMel Chen  %rdx = phi i16 [ %start, %for.body.preheader ], [ %cond, %for.body ]
1210ab9cd27fSMel Chen  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1211ab9cd27fSMel Chen  %0 = load i32, ptr %arrayidx, align 4
1212ab9cd27fSMel Chen  %arrayidx2 = getelementptr inbounds i32, ptr %b, i64 %iv
1213ab9cd27fSMel Chen  %1 = load i32, ptr %arrayidx2, align 4
1214ab9cd27fSMel Chen  %cmp3 = icmp sgt i32 %0, %1
1215ab9cd27fSMel Chen  %2 = trunc i64 %iv to i16
1216ab9cd27fSMel Chen  %cond = select i1 %cmp3, i16 %2, i16 %rdx
1217ab9cd27fSMel Chen  %inc = add nuw nsw i64 %iv, 1
1218ab9cd27fSMel Chen  %exitcond.not = icmp eq i64 %inc, %wide.trip.count
1219ab9cd27fSMel Chen  br i1 %exitcond.not, label %exit, label %for.body
1220ab9cd27fSMel Chen
1221ab9cd27fSMel Chenexit:                                             ; preds = %for.body, %entry
1222ab9cd27fSMel Chen  %rdx.0.lcssa = phi i16 [ %start, %entry ], [ %cond, %for.body ]
1223ab9cd27fSMel Chen  ret i16 %rdx.0.lcssa
1224ab9cd27fSMel Chen}
1225*b3cba9beSMel Chen;.
1226*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1227*b3cba9beSMel Chen; CHECK-VF4IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1228*b3cba9beSMel Chen; CHECK-VF4IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1229*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1230*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1231*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1232*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1233*b3cba9beSMel Chen; CHECK-VF4IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1234*b3cba9beSMel Chen;.
1235*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1236*b3cba9beSMel Chen; CHECK-VF4IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1237*b3cba9beSMel Chen; CHECK-VF4IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1238*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1239*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1240*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1241*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1242*b3cba9beSMel Chen; CHECK-VF4IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1243*b3cba9beSMel Chen;.
1244*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1245*b3cba9beSMel Chen; CHECK-VF1IC4: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1246*b3cba9beSMel Chen; CHECK-VF1IC4: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1247*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
1248*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1249*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
1250*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1251*b3cba9beSMel Chen; CHECK-VF1IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
1252*b3cba9beSMel Chen;.
1253