xref: /llvm-project/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll (revision 1de3dc7d23dd6b856efad3a3a04f2396328726d7)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2
3; To test epilogue-vectorization we need to make sure that the vectorizer actually vectorizes the loop.
4; Without a target triple this becomes difficult, unless we force vectorization through user hints.
5; Currently user provided vectorization hints prevent epilogue vectorization unless the forced
6; VF is the same as the epilogue vectorization VF. To make these tests target independent we'll use a
7; trick where both VFs are forced to be the same value. Mismatching VFs are tested in target specific tests.
8; RUN: opt -passes='loop-vectorize' -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s
9
10; Some simpler cases are found profitable even without triple or user hints.
11; RUN: opt -passes='loop-vectorize' -enable-epilogue-vectorization -epilogue-vectorization-force-VF=2 -S %s | FileCheck --check-prefix=CHECK-PROFITABLE-BY-DEFAULT %s
12
13target datalayout = "e-m:e-i64:64-n32:64-v128:128:128"
14
15define dso_local void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 signext %N) {
16; CHECK-LABEL: @f1(
17; CHECK-NEXT:  entry:
18; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
19; CHECK-NEXT:    br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
20; CHECK:       iter.check:
21; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
22; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
23; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
24; CHECK:       vector.main.loop.iter.check:
25; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
26; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
27; CHECK:       vector.ph:
28; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
29; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
30; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
31; CHECK:       vector.body:
32; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
33; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
34; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[BB:%.*]], i64 [[TMP0]]
35; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
36; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
37; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[CC:%.*]], i64 [[TMP0]]
38; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
39; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
40; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD2]]
41; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[AA:%.*]], i64 [[TMP0]]
42; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 0
43; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP7]], align 4
44; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
45; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
46; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
47; CHECK:       middle.block:
48; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
49; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
50; CHECK:       vec.epilog.iter.check:
51; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
52; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
53; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
54; CHECK:       vec.epilog.ph:
55; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
56; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
57; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF3]]
58; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
59; CHECK:       vec.epilog.vector.body:
60; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
61; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX6]], 0
62; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[TMP9]]
63; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0
64; CHECK-NEXT:    [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP11]], align 4
65; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[TMP9]]
66; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
67; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP13]], align 4
68; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD8]]
69; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[TMP9]]
70; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP15]], i32 0
71; CHECK-NEXT:    store <4 x float> [[TMP14]], ptr [[TMP16]], align 4
72; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
73; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]]
74; CHECK-NEXT:    br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
75; CHECK:       vec.epilog.middle.block:
76; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC4]]
77; CHECK-NEXT:    br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
78; CHECK:       vec.epilog.scalar.ph:
79; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
80; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
81; CHECK:       for.body:
82; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
83; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDVARS_IV]]
84; CHECK-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4
85; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDVARS_IV]]
86; CHECK-NEXT:    [[TMP19:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
87; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP18]], [[TMP19]]
88; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDVARS_IV]]
89; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX4]], align 4
90; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
91; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
92; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
93; CHECK:       for.end.loopexit:
94; CHECK-NEXT:    br label [[FOR_END]]
95; CHECK:       for.end:
96; CHECK-NEXT:    ret void
97;
98; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f1(
99; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  entry:
100; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
101; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
102; CHECK-PROFITABLE-BY-DEFAULT:       for.body.preheader:
103; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
104; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
105; CHECK-PROFITABLE-BY-DEFAULT:       for.body:
106; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
107; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[BB:%.*]], i64 [[INDVARS_IV]]
108; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
109; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[CC:%.*]], i64 [[INDVARS_IV]]
110; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
111; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], [[TMP1]]
112; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, ptr [[AA:%.*]], i64 [[INDVARS_IV]]
113; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store float [[ADD]], ptr [[ARRAYIDX4]], align 4
114; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
115; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
116; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
117; CHECK-PROFITABLE-BY-DEFAULT:       for.end.loopexit:
118; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_END]]
119; CHECK-PROFITABLE-BY-DEFAULT:       for.end:
120; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    ret void
121;
122entry:
123  %cmp1 = icmp sgt i32 %N, 0
124  br i1 %cmp1, label %for.body.preheader, label %for.end
125
126for.body.preheader:                               ; preds = %entry
127  %wide.trip.count = zext i32 %N to i64
128  br label %for.body
129
130for.body:                                         ; preds = %for.body.preheader, %for.body
131  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
132  %arrayidx = getelementptr inbounds float, ptr %bb, i64 %indvars.iv
133  %0 = load float, ptr %arrayidx, align 4
134  %arrayidx2 = getelementptr inbounds float, ptr %cc, i64 %indvars.iv
135  %1 = load float, ptr %arrayidx2, align 4
136  %add = fadd fast float %0, %1
137  %arrayidx4 = getelementptr inbounds float, ptr %aa, i64 %indvars.iv
138  store float %add, ptr %arrayidx4, align 4
139  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
140  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
141  br i1 %exitcond, label %for.body, label %for.end.loopexit
142
143for.end.loopexit:                                 ; preds = %for.body
144  br label %for.end
145
146for.end:                                          ; preds = %for.end.loopexit, %entry
147  ret void
148}
149
150define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) {
151; CHECK-LABEL: @f2(
152; CHECK-NEXT:  entry:
153; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
154; CHECK-NEXT:    br i1 [[CMP1]], label [[ITER_CHECK:%.*]], label [[FOR_END:%.*]]
155; CHECK:       iter.check:
156; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
157; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
158; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
159; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
160; CHECK:       vector.scevcheck:
161; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i64 [[WIDE_TRIP_COUNT]], -1
162; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32
163; CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP2]])
164; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0
165; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1
166; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[TMP0]], [[MUL_RESULT]]
167; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[TMP0]]
168; CHECK-NEXT:    [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW]]
169; CHECK-NEXT:    [[TMP6:%.*]] = icmp ugt i64 [[TMP1]], 4294967295
170; CHECK-NEXT:    [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
171; CHECK-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
172; CHECK:       vector.main.loop.iter.check:
173; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
174; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
175; CHECK:       vector.ph:
176; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
177; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
178; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
179; CHECK:       vector.body:
180; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
181; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 0
182; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
183; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], 0
184; CHECK-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP8]], -1
185; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], [[N]]
186; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
187; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP12]]
188; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0
189; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3
190; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4
191; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
192; CHECK-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00)
193; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP9]]
194; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
195; CHECK-NEXT:    store <4 x float> [[TMP16]], ptr [[TMP18]], align 4
196; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
197; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
198; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
199; CHECK:       middle.block:
200; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
201; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
202; CHECK:       vec.epilog.iter.check:
203; CHECK-NEXT:    [[IND_END4:%.*]] = trunc i64 [[N_VEC]] to i32
204; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
205; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
206; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
207; CHECK:       vec.epilog.ph:
208; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
209; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
210; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
211; CHECK-NEXT:    [[IND_END:%.*]] = trunc i64 [[N_VEC3]] to i32
212; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
213; CHECK:       vec.epilog.vector.body:
214; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
215; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX7]], 0
216; CHECK-NEXT:    [[OFFSET_IDX8:%.*]] = trunc i64 [[INDEX7]] to i32
217; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[OFFSET_IDX8]], 0
218; CHECK-NEXT:    [[TMP22:%.*]] = xor i32 [[TMP20]], -1
219; CHECK-NEXT:    [[TMP23:%.*]] = add i32 [[TMP22]], [[N]]
220; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
221; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]]
222; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 0
223; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3
224; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4
225; CHECK-NEXT:    [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
226; CHECK-NEXT:    [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00)
227; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
228; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, ptr [[TMP29]], i32 0
229; CHECK-NEXT:    store <4 x float> [[TMP28]], ptr [[TMP30]], align 4
230; CHECK-NEXT:    [[INDEX_NEXT11]] = add nuw i64 [[INDEX7]], 4
231; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC3]]
232; CHECK-NEXT:    br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
233; CHECK:       vec.epilog.middle.block:
234; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
235; CHECK-NEXT:    br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
236; CHECK:       vec.epilog.scalar.ph:
237; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
238; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ]
239; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
240; CHECK:       for.body:
241; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
242; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
243; CHECK-NEXT:    [[TMP32:%.*]] = xor i32 [[I_014]], -1
244; CHECK-NEXT:    [[SUB2:%.*]] = add i32 [[TMP32]], [[N]]
245; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
246; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IDXPROM]]
247; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[ARRAYIDX]], align 4
248; CHECK-NEXT:    [[CONV3:%.*]] = fadd fast float [[TMP33]], 1.000000e+00
249; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
250; CHECK-NEXT:    store float [[CONV3]], ptr [[ARRAYIDX5]], align 4
251; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
252; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_014]], 1
253; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
254; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]]
255; CHECK:       for.end.loopexit:
256; CHECK-NEXT:    br label [[FOR_END]]
257; CHECK:       for.end:
258; CHECK-NEXT:    ret i32 0
259;
260; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f2(
261; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  entry:
262; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 1
263; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
264; CHECK-PROFITABLE-BY-DEFAULT:       for.body.preheader:
265; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
266; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[TMP0]] to i64
267; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
268; CHECK-PROFITABLE-BY-DEFAULT:       for.body:
269; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
270; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
271; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = xor i32 [[I_014]], -1
272; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[SUB2:%.*]] = add i32 [[TMP1]], [[N]]
273; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IDXPROM:%.*]] = sext i32 [[SUB2]] to i64
274; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[IDXPROM]]
275; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4
276; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CONV3:%.*]] = fadd fast float [[TMP2]], 1.000000e+00
277; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDVARS_IV]]
278; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store float [[CONV3]], ptr [[ARRAYIDX5]], align 4
279; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
280; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INC]] = add nuw nsw i32 [[I_014]], 1
281; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
282; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
283; CHECK-PROFITABLE-BY-DEFAULT:       for.end.loopexit:
284; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_END]]
285; CHECK-PROFITABLE-BY-DEFAULT:       for.end:
286; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    ret i32 0
287;
288entry:
289  %cmp1 = icmp sgt i32 %n, 1
290  br i1 %cmp1, label %for.body.preheader, label %for.end
291
292for.body.preheader:                               ; preds = %entry
293  %0 = add i32 %n, -1
294  %wide.trip.count = zext i32 %0 to i64
295  br label %for.body
296
297for.body:                                         ; preds = %for.body.preheader, %for.body
298  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
299  %i.014 = phi i32 [ 0, %for.body.preheader ], [ %inc, %for.body ]
300  %1 = xor i32 %i.014, -1
301  %sub2 = add i32 %1, %n
302  %idxprom = sext i32 %sub2 to i64
303  %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
304  %2 = load float, ptr %arrayidx, align 4
305  %conv3 = fadd fast float %2, 1.000000e+00
306  %arrayidx5 = getelementptr inbounds float, ptr %A, i64 %indvars.iv
307  store float %conv3, ptr %arrayidx5, align 4
308  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
309  %inc = add nuw nsw i32 %i.014, 1
310  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
311  br i1 %exitcond, label %for.body, label %for.end.loopexit
312
313for.end.loopexit:                                 ; preds = %for.body
314  br label %for.end
315
316for.end:                                          ; preds = %for.end.loopexit, %entry
317  ret i32 0
318}
319
320define void @f3(ptr noalias %A, i64 %n) {
321; CHECK-LABEL: @f3(
322; CHECK-NEXT:  iter.check:
323; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
324; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
325; CHECK:       vector.main.loop.iter.check:
326; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
327; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
328; CHECK:       vector.ph:
329; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
330; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
331; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
332; CHECK:       vector.body:
333; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
334; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
335; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
336; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
337; CHECK-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1
338; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
339; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
340; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
341; CHECK:       middle.block:
342; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
343; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
344; CHECK:       vec.epilog.iter.check:
345; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
346; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
347; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
348; CHECK:       vec.epilog.ph:
349; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
350; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], 4
351; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
352; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
353; CHECK:       vec.epilog.vector.body:
354; CHECK-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
355; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX5]], 0
356; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
357; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
358; CHECK-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP6]], align 1
359; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 4
360; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
361; CHECK-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
362; CHECK:       vec.epilog.middle.block:
363; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
364; CHECK-NEXT:    br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
365; CHECK:       vec.epilog.scalar.ph:
366; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
367; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
368; CHECK:       for.body:
369; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
370; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
371; CHECK-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
372; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
373; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
374; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP10:![0-9]+]]
375; CHECK:       for.end.loopexit:
376; CHECK-NEXT:    br label [[FOR_END:%.*]]
377; CHECK:       for.end:
378; CHECK-NEXT:    ret void
379;
380; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f3(
381; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  iter.check:
382; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
383; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
384; CHECK-PROFITABLE-BY-DEFAULT:       vector.main.loop.iter.check:
385; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
386; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
387; CHECK-PROFITABLE-BY-DEFAULT:       vector.ph:
388; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
389; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
390; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
391; CHECK-PROFITABLE-BY-DEFAULT:       vector.body:
392; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
393; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
394; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
395; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
396; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> splat (i8 1), ptr [[TMP2]], align 1
397; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
398; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
399; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
400; CHECK-PROFITABLE-BY-DEFAULT:       middle.block:
401; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
402; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
403; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.iter.check:
404; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
405; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
406; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
407; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.ph:
408; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
409; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2
410; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]]
411; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
412; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.vector.body:
413; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
414; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX5]], 0
415; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP4]]
416; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
417; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> splat (i8 1), ptr [[TMP6]], align 1
418; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 2
419; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
420; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
421; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.middle.block:
422; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]]
423; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP_N4]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]]
424; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.scalar.ph:
425; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
426; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_BODY:%.*]]
427; CHECK-PROFITABLE-BY-DEFAULT:       for.body:
428; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
429; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
430; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
431; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
432; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]]
433; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
434; CHECK-PROFITABLE-BY-DEFAULT:       for.end.loopexit:
435; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[FOR_END:%.*]]
436; CHECK-PROFITABLE-BY-DEFAULT:       for.end:
437; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    ret void
438;
439entry:
440  br label %for.body
441
442for.body:                                         ; preds = %for.body.preheader, %for.body
443  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
444  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
445  store i8 1, ptr %arrayidx, align 1
446  %iv.next = add nuw nsw i64 %iv, 1
447  %exitcond = icmp ne i64 %iv.next, %n
448  br i1 %exitcond, label %for.body, label %for.end.loopexit
449
450for.end.loopexit:                                 ; preds = %for.body
451  br label %for.end
452
453for.end:                                          ; preds = %for.end.loopexit, %entry
454  ret void
455}
456
457; Test case for PR54745.
458define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst) {
459; CHECK-LABEL: @induction_resume_value_requires_non_trivial_scev_expansion(
460; CHECK-NEXT:  entry:
461; CHECK-NEXT:    br label [[ITER_CHECK:%.*]]
462; CHECK:       iter.check:
463; CHECK-NEXT:    [[INDUCTION_IV:%.*]] = phi i8 [ [[INDUCTION_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ -56, [[ENTRY:%.*]] ]
464; CHECK-NEXT:    [[INDVAR:%.*]] = phi i8 [ [[INDVAR_NEXT:%.*]], [[OUTER_LATCH]] ], [ 0, [[ENTRY]] ]
465; CHECK-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH]] ]
466; CHECK-NEXT:    [[P2:%.*]] = phi i32 [ -202, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
467; CHECK-NEXT:    [[TMP0:%.*]] = mul i8 [[INDVAR]], -1
468; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -3
469; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[OUTER_IV]] to i32
470; CHECK-NEXT:    [[ADD]] = add i32 [[P2]], [[TRUNC_IV]]
471; CHECK-NEXT:    [[TRUNC_ADD:%.*]] = trunc i32 [[ADD]] to i8
472; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
473; CHECK:       vector.main.loop.iter.check:
474; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
475; CHECK:       vector.ph:
476; CHECK-NEXT:    [[IND_END:%.*]] = mul i8 84, [[INDUCTION_IV]]
477; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
478; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
479; CHECK-NEXT:    [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
480; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
481; CHECK-NEXT:    [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
482; CHECK-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
483; CHECK-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
484; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
485; CHECK:       vector.body:
486; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
487; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
488; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
489; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
490; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP4]]
491; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
492; CHECK-NEXT:    store <4 x i8> [[VEC_IND]], ptr [[TMP6]], align 1
493; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
494; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], [[DOTSPLAT2]]
495; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 84
496; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
497; CHECK:       middle.block:
498; CHECK-NEXT:    br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
499; CHECK:       vec.epilog.iter.check:
500; CHECK-NEXT:    [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]]
501; CHECK-NEXT:    br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
502; CHECK:       vec.epilog.ph:
503; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
504; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
505; CHECK-NEXT:    [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
506; CHECK-NEXT:    [[DOTSPLATINSERT8:%.*]] = insertelement <4 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
507; CHECK-NEXT:    [[DOTSPLAT9:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT8]], <4 x i8> poison, <4 x i32> zeroinitializer
508; CHECK-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
509; CHECK-NEXT:    [[DOTSPLAT11:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT10]], <4 x i8> poison, <4 x i32> zeroinitializer
510; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT11]]
511; CHECK-NEXT:    [[INDUCTION12:%.*]] = add <4 x i8> [[DOTSPLAT9]], [[TMP8]]
512; CHECK-NEXT:    [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 4
513; CHECK-NEXT:    [[DOTSPLATINSERT13:%.*]] = insertelement <4 x i8> poison, i8 [[TMP9]], i64 0
514; CHECK-NEXT:    [[DOTSPLAT14:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT13]], <4 x i8> poison, <4 x i32> zeroinitializer
515; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
516; CHECK:       vec.epilog.vector.body:
517; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
518; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i8> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
519; CHECK-NEXT:    [[OFFSET_IDX17:%.*]] = add i64 1, [[INDEX7]]
520; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX17]], 0
521; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP10]]
522; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
523; CHECK-NEXT:    store <4 x i8> [[VEC_IND15]], ptr [[TMP12]], align 1
524; CHECK-NEXT:    [[INDEX_NEXT18]] = add nuw i64 [[INDEX7]], 4
525; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i8> [[VEC_IND15]], [[DOTSPLAT14]]
526; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT18]], 84
527; CHECK-NEXT:    br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
528; CHECK:       vec.epilog.middle.block:
529; CHECK-NEXT:    br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]]
530; CHECK:       vec.epilog.scalar.ph:
531; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ]
532; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ]
533; CHECK-NEXT:    br label [[INNER:%.*]]
534; CHECK:       inner:
535; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[INNER]] ]
536; CHECK-NEXT:    [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER]] ]
537; CHECK-NEXT:    [[IV_2_NEXT]] = sub i8 [[IV_2]], [[TRUNC_ADD]]
538; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
539; CHECK-NEXT:    store i8 [[IV_2]], ptr [[GEP_DST]], align 1
540; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
541; CHECK-NEXT:    [[EC:%.*]] = icmp ugt i64 [[IV]], 83
542; CHECK-NEXT:    br i1 [[EC]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP13:![0-9]+]]
543; CHECK:       outer.latch:
544; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
545; CHECK-NEXT:    [[INDVAR_NEXT]] = add i8 [[INDVAR]], 1
546; CHECK-NEXT:    [[INDUCTION_IV_NEXT]] = add i8 [[INDUCTION_IV]], [[TMP1]]
547; CHECK-NEXT:    br label [[ITER_CHECK]]
548;
549; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @induction_resume_value_requires_non_trivial_scev_expansion(
550; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  entry:
551; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[ITER_CHECK:%.*]]
552; CHECK-PROFITABLE-BY-DEFAULT:       iter.check:
553; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION_IV:%.*]] = phi i8 [ [[INDUCTION_IV_NEXT:%.*]], [[OUTER_LATCH:%.*]] ], [ -56, [[ENTRY:%.*]] ]
554; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVAR:%.*]] = phi i8 [ [[INDVAR_NEXT:%.*]], [[OUTER_LATCH]] ], [ 0, [[ENTRY]] ]
555; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], [[OUTER_LATCH]] ]
556; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[P2:%.*]] = phi i32 [ -202, [[ENTRY]] ], [ [[ADD:%.*]], [[OUTER_LATCH]] ]
557; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = mul i8 [[INDVAR]], -1
558; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = add i8 [[TMP0]], -3
559; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[OUTER_IV]] to i32
560; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ADD]] = add i32 [[P2]], [[TRUNC_IV]]
561; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TRUNC_ADD:%.*]] = trunc i32 [[ADD]] to i8
562; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
563; CHECK-PROFITABLE-BY-DEFAULT:       vector.main.loop.iter.check:
564; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
565; CHECK-PROFITABLE-BY-DEFAULT:       vector.ph:
566; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IND_END:%.*]] = mul i8 84, [[INDUCTION_IV]]
567; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[INDUCTION_IV]], i64 0
568; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
569; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = mul <4 x i8> <i8 0, i8 1, i8 2, i8 3>, [[DOTSPLAT]]
570; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> zeroinitializer, [[TMP2]]
571; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = mul i8 [[INDUCTION_IV]], 4
572; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[TMP3]], i64 0
573; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT2:%.*]] = shufflevector <4 x i8> [[DOTSPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
574; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
575; CHECK-PROFITABLE-BY-DEFAULT:       vector.body:
576; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
577; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
578; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
579; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
580; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP4]]
581; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0
582; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> [[VEC_IND]], ptr [[TMP6]], align 1
583; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
584; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], [[DOTSPLAT2]]
585; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 84
586; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
587; CHECK-PROFITABLE-BY-DEFAULT:       middle.block:
588; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
589; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.iter.check:
590; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]]
591; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
592; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.ph:
593; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
594; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
595; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IND_END4:%.*]] = mul i8 84, [[INDUCTION_IV]]
596; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT8:%.*]] = insertelement <2 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
597; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT9:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT8]], <2 x i8> poison, <2 x i32> zeroinitializer
598; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT10:%.*]] = insertelement <2 x i8> poison, i8 [[INDUCTION_IV]], i64 0
599; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT11:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT10]], <2 x i8> poison, <2 x i32> zeroinitializer
600; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP8:%.*]] = mul <2 x i8> <i8 0, i8 1>, [[DOTSPLAT11]]
601; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION12:%.*]] = add <2 x i8> [[DOTSPLAT9]], [[TMP8]]
602; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP9:%.*]] = mul i8 [[INDUCTION_IV]], 2
603; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT13:%.*]] = insertelement <2 x i8> poison, i8 [[TMP9]], i64 0
604; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT14:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT13]], <2 x i8> poison, <2 x i32> zeroinitializer
605; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
606; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.vector.body:
607; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT18:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
608; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND15:%.*]] = phi <2 x i8> [ [[INDUCTION12]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
609; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[OFFSET_IDX17:%.*]] = add i64 1, [[INDEX7]]
610; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX17]], 0
611; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[TMP10]]
612; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
613; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> [[VEC_IND15]], ptr [[TMP12]], align 1
614; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT18]] = add nuw i64 [[INDEX7]], 2
615; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT16]] = add <2 x i8> [[VEC_IND15]], [[DOTSPLAT14]]
616; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT18]], 84
617; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
618; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.middle.block:
619; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 true, label [[OUTER_LATCH]], label [[VEC_EPILOG_SCALAR_PH]]
620; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.scalar.ph:
621; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i64 [ 85, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1, [[ITER_CHECK]] ], [ 85, [[VEC_EPILOG_ITER_CHECK]] ]
622; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i8 [ [[IND_END4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK]] ], [ [[IND_END5]], [[VEC_EPILOG_ITER_CHECK]] ]
623; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[INNER:%.*]]
624; CHECK-PROFITABLE-BY-DEFAULT:       inner:
625; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL3]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[INNER]] ]
626; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_2:%.*]] = phi i8 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER]] ]
627; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_2_NEXT]] = sub i8 [[IV_2]], [[TRUNC_ADD]]
628; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]]
629; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store i8 [[IV_2]], ptr [[GEP_DST]], align 1
630; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
631; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EC:%.*]] = icmp ugt i64 [[IV]], 83
632; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EC]], label [[OUTER_LATCH]], label [[INNER]], !llvm.loop [[LOOP7:![0-9]+]]
633; CHECK-PROFITABLE-BY-DEFAULT:       outer.latch:
634; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
635; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDVAR_NEXT]] = add i8 [[INDVAR]], 1
636; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION_IV_NEXT]] = add i8 [[INDUCTION_IV]], [[TMP1]]
637; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[ITER_CHECK]]
638;
639entry:
640  br label %outer.header
641
642outer.header:
643  %outer.iv = phi i64 [ 2, %entry ], [ %outer.iv.next, %outer.latch ]
644  %p2 = phi i32 [ -202, %entry ], [ %add, %outer.latch ]
645  %trunc.iv  = trunc i64 %outer.iv to i32
646  %add = add i32 %p2, %trunc.iv
647  %trunc.add = trunc i32 %add to i8
648  br label %inner
649
650inner:
651  %iv = phi i64 [ 1, %outer.header ], [ %iv.next, %inner ]
652  %iv.2 = phi i8 [ 0, %outer.header ], [ %iv.2.next, %inner ]
653  %iv.2.next = sub i8 %iv.2, %trunc.add
654  %gep.dst = getelementptr inbounds i8, ptr %dst, i64 %iv
655  store i8 %iv.2, ptr %gep.dst
656  %iv.next = add nuw nsw i64 %iv, 1
657  %ec = icmp ugt i64 %iv, 83
658  br i1 %ec, label %outer.latch, label %inner
659
660outer.latch:
661  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
662  br label %outer.header
663}
664
665; Check handling of widened/truncated inductions.
666define void @f4(ptr noalias %A, i32 signext %n) {
667; CHECK-LABEL: @f4(
668; CHECK-NEXT:  iter.check:
669; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N:%.*]] to i64
670; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
671; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
672; CHECK:       vector.main.loop.iter.check:
673; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
674; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
675; CHECK:       vector.ph:
676; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
677; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
678; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
679; CHECK:       vector.body:
680; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
681; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
682; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
683; CHECK-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i8>
684; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
685; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
686; CHECK-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1
687; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
688; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
689; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
690; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
691; CHECK:       middle.block:
692; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
693; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
694; CHECK:       vec.epilog.iter.check:
695; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
696; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
697; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
698; CHECK:       vec.epilog.ph:
699; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
700; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
701; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
702; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32
703; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
704; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
705; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
706; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
707; CHECK:       vec.epilog.vector.body:
708; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
709; CHECK-NEXT:    [[VEC_IND7:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
710; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX6]], 0
711; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i32> [[VEC_IND7]] to <4 x i8>
712; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
713; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
714; CHECK-NEXT:    store <4 x i8> [[TMP7]], ptr [[TMP9]], align 1
715; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
716; CHECK-NEXT:    [[VEC_IND_NEXT8]] = add <4 x i32> [[VEC_IND7]], splat (i32 4)
717; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
718; CHECK-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
719; CHECK:       vec.epilog.middle.block:
720; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
721; CHECK-NEXT:    br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
722; CHECK:       vec.epilog.scalar.ph:
723; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
724; CHECK-NEXT:    br label [[LOOP:%.*]]
725; CHECK:       loop:
726; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
727; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[IV]] to i32
728; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP11]] to i8
729; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
730; CHECK-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
731; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
732; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
733; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP16:![0-9]+]]
734; CHECK:       exit:
735; CHECK-NEXT:    ret void
736;
737; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @f4(
738; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  iter.check:
739; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N:%.*]] to i64
740; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 2
741; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
742; CHECK-PROFITABLE-BY-DEFAULT:       vector.main.loop.iter.check:
743; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
744; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
745; CHECK-PROFITABLE-BY-DEFAULT:       vector.ph:
746; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
747; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
748; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
749; CHECK-PROFITABLE-BY-DEFAULT:       vector.body:
750; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
751; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
752; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
753; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP1:%.*]] = trunc <4 x i32> [[VEC_IND]] to <4 x i8>
754; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
755; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0
756; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <4 x i8> [[TMP1]], ptr [[TMP3]], align 1
757; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
758; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
759; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
760; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
761; CHECK-PROFITABLE-BY-DEFAULT:       middle.block:
762; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
763; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
764; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.iter.check:
765; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
766; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
767; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
768; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.ph:
769; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
770; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 2
771; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[N_VEC3:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF2]]
772; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i32
773; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i64 0
774; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
775; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
776; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
777; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.vector.body:
778; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
779; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
780; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX6]], 0
781; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP7:%.*]] = trunc <2 x i32> [[VEC_IND7]] to <2 x i8>
782; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP6]]
783; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
784; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store <2 x i8> [[TMP7]], ptr [[TMP9]], align 1
785; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2
786; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2)
787; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
788; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
789; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.middle.block:
790; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]]
791; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[CMP_N5]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
792; CHECK-PROFITABLE-BY-DEFAULT:       vec.epilog.scalar.ph:
793; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
794; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[LOOP:%.*]]
795; CHECK-PROFITABLE-BY-DEFAULT:       loop:
796; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
797; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[TMP11:%.*]] = trunc i64 [[IV]] to i32
798; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP11]] to i8
799; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
800; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store i8 [[CONV]], ptr [[ARRAYIDX]], align 1
801; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
802; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]]
803; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP10:![0-9]+]]
804; CHECK-PROFITABLE-BY-DEFAULT:       exit:
805; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    ret void
806;
807entry:
808  %wide.trip.count = zext i32 %n to i64
809  br label %loop
810
811loop:
812  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
813  %0 = trunc i64 %iv to i32
814  %conv = trunc i32 %0 to i8
815  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
816  store i8 %conv, ptr %arrayidx, align 1
817  %iv.next = add nuw nsw i64 %iv, 1
818  %exitcond = icmp ne i64 %iv.next, %wide.trip.count
819  br i1 %exitcond, label %loop, label %exit
820
821exit:
822  ret void
823}
824
825define void @multiple_ivs_wide(ptr %dst) {
826; CHECK-LABEL: @multiple_ivs_wide(
827; CHECK-NEXT:  iter.check:
828; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
829; CHECK:       vector.main.loop.iter.check:
830; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
831; CHECK:       vector.ph:
832; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
833; CHECK:       vector.body:
834; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
835; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 -64, i32 -62, i32 -60, i32 -58>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
836; CHECK-NEXT:    [[TMP0:%.*]] = mul i32 [[INDEX]], 2
837; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 0
838; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0]], 2
839; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], 4
840; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP0]], 6
841; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 2)
842; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[TMP1]]
843; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP2]]
844; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP3]]
845; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
846; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[TMP5]], i32 0
847; CHECK-NEXT:    store i32 [[TMP10]], ptr [[TMP6]], align 4
848; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[TMP5]], i32 1
849; CHECK-NEXT:    store i32 [[TMP11]], ptr [[TMP7]], align 4
850; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
851; CHECK-NEXT:    store i32 [[TMP12]], ptr [[TMP8]], align 4
852; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
853; CHECK-NEXT:    store i32 [[TMP13]], ptr [[TMP9]], align 4
854; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
855; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 8)
856; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 64
857; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
858; CHECK:       middle.block:
859; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
860; CHECK:       vec.epilog.iter.check:
861; CHECK-NEXT:    br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
862; CHECK:       vec.epilog.ph:
863; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 64, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
864; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 64, [[VEC_EPILOG_ITER_CHECK]] ], [ -64, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
865; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
866; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
867; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 2, i32 4, i32 6>
868; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
869; CHECK:       vec.epilog.vector.body:
870; CHECK-NEXT:    [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
871; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
872; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i32 [[INDEX1]], 2
873; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 0
874; CHECK-NEXT:    [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 2
875; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 4
876; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 6
877; CHECK-NEXT:    [[TMP19:%.*]] = add <4 x i32> [[VEC_IND2]], splat (i32 2)
878; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP15]]
879; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP16]]
880; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP17]]
881; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP18]]
882; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP19]], i32 0
883; CHECK-NEXT:    store i32 [[TMP24]], ptr [[TMP20]], align 4
884; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <4 x i32> [[TMP19]], i32 1
885; CHECK-NEXT:    store i32 [[TMP25]], ptr [[TMP21]], align 4
886; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i32> [[TMP19]], i32 2
887; CHECK-NEXT:    store i32 [[TMP26]], ptr [[TMP22]], align 4
888; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i32> [[TMP19]], i32 3
889; CHECK-NEXT:    store i32 [[TMP27]], ptr [[TMP23]], align 4
890; CHECK-NEXT:    [[INDEX_NEXT4]] = add nuw i32 [[INDEX1]], 4
891; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], splat (i32 8)
892; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i32 [[INDEX_NEXT4]], 64
893; CHECK-NEXT:    br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
894; CHECK:       vec.epilog.middle.block:
895; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
896; CHECK:       vec.epilog.scalar.ph:
897; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi i32 [ 128, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 128, [[VEC_EPILOG_ITER_CHECK]] ]
898; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi i32 [ 64, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ -64, [[ITER_CHECK]] ], [ 64, [[VEC_EPILOG_ITER_CHECK]] ]
899; CHECK-NEXT:    br label [[LOOP:%.*]]
900; CHECK:       loop:
901; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
902; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
903; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 2
904; CHECK-NEXT:    [[IV_2_NEXT]] = add i32 [[IV_2]], 2
905; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
906; CHECK-NEXT:    store i32 [[IV_2_NEXT]], ptr [[GEP]], align 4
907; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 128
908; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
909; CHECK:       exit:
910; CHECK-NEXT:    ret void
911;
912; CHECK-PROFITABLE-BY-DEFAULT-LABEL: @multiple_ivs_wide(
913; CHECK-PROFITABLE-BY-DEFAULT-NEXT:  entry:
914; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br label [[LOOP:%.*]]
915; CHECK-PROFITABLE-BY-DEFAULT:       loop:
916; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
917; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_2:%.*]] = phi i32 [ -64, [[ENTRY]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
918; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 2
919; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[IV_2_NEXT]] = add i32 [[IV_2]], 2
920; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]]
921; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    store i32 [[IV_2_NEXT]], ptr [[GEP]], align 4
922; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 128
923; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
924; CHECK-PROFITABLE-BY-DEFAULT:       exit:
925; CHECK-PROFITABLE-BY-DEFAULT-NEXT:    ret void
926;
927entry:
928  br label %loop
929
930loop:
931  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
932  %iv.2 = phi i32 [ -64, %entry ], [ %iv.2.next, %loop ]
933  %iv.next = add i32 %iv, 2
934  %iv.2.next = add i32 %iv.2, 2
935  %gep = getelementptr inbounds i32, ptr %dst, i32 %iv
936  store i32 %iv.2.next, ptr %gep, align 4
937  %ec = icmp eq i32 %iv.next, 128
938  br i1 %ec, label %exit, label %loop
939
940exit:                                        ; preds = %loop
941  ret void
942}
943