xref: /llvm-project/llvm/test/Transforms/LoopVectorize/loop-form.ll (revision 82821254f532c1dbdfd5d985ef7130511efaaa83)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -passes=loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(ptr %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0
24; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP4]], align 4
25; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
26; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
27; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
28; CHECK:       middle.block:
29; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
30; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
31; CHECK:       scalar.ph:
32; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
33; CHECK-NEXT:    br label [[FOR_COND:%.*]]
34; CHECK:       for.cond:
35; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
36; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
37; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
38; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
39; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
40; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
41; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP3:![0-9]+]]
42; CHECK:       if.end:
43; CHECK-NEXT:    ret void
44;
45; TAILFOLD-LABEL: @bottom_tested(
46; TAILFOLD-NEXT:  entry:
47; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
48; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
49; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
50; TAILFOLD:       vector.ph:
51; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
52; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
53; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
54; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
55; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
56; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
57; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
58; TAILFOLD:       vector.body:
59; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
60; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
61; TAILFOLD-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
62; TAILFOLD-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
63; TAILFOLD-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
64; TAILFOLD-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
65; TAILFOLD:       pred.store.if:
66; TAILFOLD-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
67; TAILFOLD-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
68; TAILFOLD-NEXT:    store i16 0, ptr [[TMP5]], align 4
69; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
70; TAILFOLD:       pred.store.continue:
71; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
72; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
73; TAILFOLD:       pred.store.if1:
74; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
75; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[TMP7]]
76; TAILFOLD-NEXT:    store i16 0, ptr [[TMP8]], align 4
77; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
78; TAILFOLD:       pred.store.continue2:
79; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
80; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
81; TAILFOLD-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
82; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
83; TAILFOLD:       middle.block:
84; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
85; TAILFOLD:       scalar.ph:
86; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
87; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
88; TAILFOLD:       for.cond:
89; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
90; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
91; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
92; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
93; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
94; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
95; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP3:![0-9]+]]
96; TAILFOLD:       if.end:
97; TAILFOLD-NEXT:    ret void
98;
99entry:
100  br label %for.cond
101
102for.cond:
103  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
104  %iprom = sext i32 %i to i64
105  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
106  store i16 0, ptr %b, align 4
107  %inc = add nsw i32 %i, 1
108  %cmp = icmp slt i32 %i, %n
109  br i1 %cmp, label %for.cond, label %if.end
110
111if.end:
112  ret void
113}
114
115define void @early_exit(ptr %p, i32 %n) {
116; CHECK-LABEL: @early_exit(
117; CHECK-NEXT:  entry:
118; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
119; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
120; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
121; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
122; CHECK:       vector.ph:
123; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
124; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
125; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
126; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
127; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
128; CHECK:       vector.body:
129; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
130; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
131; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
132; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
133; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
134; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
135; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
136; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
137; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
138; CHECK:       middle.block:
139; CHECK-NEXT:    br label [[SCALAR_PH]]
140; CHECK:       scalar.ph:
141; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
142; CHECK-NEXT:    br label [[FOR_COND:%.*]]
143; CHECK:       for.cond:
144; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
145; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
146; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
147; CHECK:       for.body:
148; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
149; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
150; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
151; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
152; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
153; CHECK:       if.end:
154; CHECK-NEXT:    ret void
155;
156; TAILFOLD-LABEL: @early_exit(
157; TAILFOLD-NEXT:  entry:
158; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
159; TAILFOLD:       for.cond:
160; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
161; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
162; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
163; TAILFOLD:       for.body:
164; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
165; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
166; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
167; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
168; TAILFOLD-NEXT:    br label [[FOR_COND]]
169; TAILFOLD:       if.end:
170; TAILFOLD-NEXT:    ret void
171;
172entry:
173  br label %for.cond
174
175for.cond:
176  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
177  %cmp = icmp slt i32 %i, %n
178  br i1 %cmp, label %for.body, label %if.end
179
180for.body:
181  %iprom = sext i32 %i to i64
182  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
183  store i16 0, ptr %b, align 4
184  %inc = add nsw i32 %i, 1
185  br label %for.cond
186
187if.end:
188  ret void
189}
190
191define i32 @early_exit_with_live_out(ptr %ptr) {
192; CHECK-LABEL: @early_exit_with_live_out(
193; CHECK-NEXT:  entry:
194; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
195; CHECK:       vector.ph:
196; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
197; CHECK:       vector.body:
198; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
199; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
200; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP0]]
201; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
202; CHECK-NEXT:    store <2 x i32> splat (i32 10), ptr [[TMP2]], align 4
203; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
204; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
205; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
206; CHECK:       middle.block:
207; CHECK-NEXT:    br label [[SCALAR_PH]]
208; CHECK:       scalar.ph:
209; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
210; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
211; CHECK:       loop.header:
212; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
213; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[IV]]
214; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
215; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
216; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
217; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
218; CHECK:       loop.latch:
219; CHECK-NEXT:    store i32 10, ptr [[GEP]], align 4
220; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
221; CHECK:       exit:
222; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
223; CHECK-NEXT:    ret i32 [[RES_LCSSA]]
224;
225; TAILFOLD-LABEL: @early_exit_with_live_out(
226; TAILFOLD-NEXT:  entry:
227; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
228; TAILFOLD:       loop.header:
229; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
230; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[IV]]
231; TAILFOLD-NEXT:    [[L:%.*]] = load i32, ptr [[GEP]], align 4
232; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
233; TAILFOLD-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
234; TAILFOLD-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
235; TAILFOLD:       loop.latch:
236; TAILFOLD-NEXT:    store i32 10, ptr [[GEP]], align 4
237; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
238; TAILFOLD:       exit:
239; TAILFOLD-NEXT:    [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
240; TAILFOLD-NEXT:    ret i32 [[RES_LCSSA]]
241;
242entry:
243  br label %loop.header
244
245loop.header:
246  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
247  %gep = getelementptr i32, ptr %ptr, i64 %iv
248  %l = load i32, ptr %gep
249  %iv.next = add nuw nsw i64 %iv, 1
250  %ec = icmp eq i64 %iv.next, 1000
251  br i1 %ec, label %exit, label %loop.latch
252
253loop.latch:
254  store i32 10, ptr %gep
255  br label %loop.header
256
257exit:
258  %res.lcssa = phi i32 [ %l, %loop.header ]
259  ret i32 %res.lcssa
260}
261
262; Same as early_exit, but with optsize to prevent the use of
263; a scalar epilogue.  -- Can't vectorize this in either case.
264define void @optsize(ptr %p, i32 %n) optsize {
265; CHECK-LABEL: @optsize(
266; CHECK-NEXT:  entry:
267; CHECK-NEXT:    br label [[FOR_COND:%.*]]
268; CHECK:       for.cond:
269; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
270; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
271; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
272; CHECK:       for.body:
273; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
274; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
275; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
276; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
277; CHECK-NEXT:    br label [[FOR_COND]]
278; CHECK:       if.end:
279; CHECK-NEXT:    ret void
280;
281; TAILFOLD-LABEL: @optsize(
282; TAILFOLD-NEXT:  entry:
283; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
284; TAILFOLD:       for.cond:
285; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
286; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
287; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
288; TAILFOLD:       for.body:
289; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
290; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
291; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
292; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
293; TAILFOLD-NEXT:    br label [[FOR_COND]]
294; TAILFOLD:       if.end:
295; TAILFOLD-NEXT:    ret void
296;
297entry:
298  br label %for.cond
299
300for.cond:
301  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
302  %cmp = icmp slt i32 %i, %n
303  br i1 %cmp, label %for.body, label %if.end
304
305for.body:
306  %iprom = sext i32 %i to i64
307  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
308  store i16 0, ptr %b, align 4
309  %inc = add nsw i32 %i, 1
310  br label %for.cond
311
312if.end:
313  ret void
314}
315
316
317; multiple exit - no values inside the loop used outside
318define void @multiple_unique_exit(ptr %p, i32 %n) {
319; CHECK-LABEL: @multiple_unique_exit(
320; CHECK-NEXT:  entry:
321; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
322; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
323; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
324; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
325; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
326; CHECK:       vector.ph:
327; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
328; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
329; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
330; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
331; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
332; CHECK:       vector.body:
333; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
334; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
335; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
336; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
337; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
338; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
339; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
340; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
341; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
342; CHECK:       middle.block:
343; CHECK-NEXT:    br label [[SCALAR_PH]]
344; CHECK:       scalar.ph:
345; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
346; CHECK-NEXT:    br label [[FOR_COND:%.*]]
347; CHECK:       for.cond:
348; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
349; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
350; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
351; CHECK:       for.body:
352; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
353; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
354; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
355; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
356; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
357; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
358; CHECK:       if.end:
359; CHECK-NEXT:    ret void
360;
361; TAILFOLD-LABEL: @multiple_unique_exit(
362; TAILFOLD-NEXT:  entry:
363; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
364; TAILFOLD:       for.cond:
365; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
366; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
367; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
368; TAILFOLD:       for.body:
369; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
370; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
371; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
372; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
373; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
374; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
375; TAILFOLD:       if.end:
376; TAILFOLD-NEXT:    ret void
377;
378entry:
379  br label %for.cond
380
381for.cond:
382  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
383  %cmp = icmp slt i32 %i, %n
384  br i1 %cmp, label %for.body, label %if.end
385
386for.body:
387  %iprom = sext i32 %i to i64
388  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
389  store i16 0, ptr %b, align 4
390  %inc = add nsw i32 %i, 1
391  %cmp2 = icmp slt i32 %i, 2096
392  br i1 %cmp2, label %for.cond, label %if.end
393
394if.end:
395  ret void
396}
397
398; multiple exit - with an lcssa phi
399define i32 @multiple_unique_exit2(ptr %p, i32 %n) {
400; CHECK-LABEL: @multiple_unique_exit2(
401; CHECK-NEXT:  entry:
402; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
403; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
404; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
405; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
406; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
407; CHECK:       vector.ph:
408; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
409; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
410; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
411; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
412; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
413; CHECK:       vector.body:
414; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
415; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
416; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
417; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
418; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
419; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
420; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
421; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
422; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
423; CHECK:       middle.block:
424; CHECK-NEXT:    br label [[SCALAR_PH]]
425; CHECK:       scalar.ph:
426; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
427; CHECK-NEXT:    br label [[FOR_COND:%.*]]
428; CHECK:       for.cond:
429; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
430; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
431; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
432; CHECK:       for.body:
433; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
434; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
435; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
436; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
437; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
438; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
439; CHECK:       if.end:
440; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
441; CHECK-NEXT:    ret i32 [[I_LCSSA]]
442;
443; TAILFOLD-LABEL: @multiple_unique_exit2(
444; TAILFOLD-NEXT:  entry:
445; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
446; TAILFOLD:       for.cond:
447; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
448; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
449; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
450; TAILFOLD:       for.body:
451; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
452; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
453; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
454; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
455; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
456; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
457; TAILFOLD:       if.end:
458; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
459; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
460;
461entry:
462  br label %for.cond
463
464for.cond:
465  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
466  %cmp = icmp slt i32 %i, %n
467  br i1 %cmp, label %for.body, label %if.end
468
469for.body:
470  %iprom = sext i32 %i to i64
471  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
472  store i16 0, ptr %b, align 4
473  %inc = add nsw i32 %i, 1
474  %cmp2 = icmp slt i32 %i, 2096
475  br i1 %cmp2, label %for.cond, label %if.end
476
477if.end:
478  ret i32 %i
479}
480
481; multiple exit w/a non lcssa phi
482define i32 @multiple_unique_exit3(ptr %p, i32 %n) {
483; CHECK-LABEL: @multiple_unique_exit3(
484; CHECK-NEXT:  entry:
485; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
486; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
487; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
488; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
489; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
490; CHECK:       vector.ph:
491; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
492; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
493; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
494; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
495; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
496; CHECK:       vector.body:
497; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
499; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
500; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
501; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
502; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
503; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
504; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
505; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
506; CHECK:       middle.block:
507; CHECK-NEXT:    br label [[SCALAR_PH]]
508; CHECK:       scalar.ph:
509; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
510; CHECK-NEXT:    br label [[FOR_COND:%.*]]
511; CHECK:       for.cond:
512; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
513; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
514; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
515; CHECK:       for.body:
516; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
517; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
518; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
519; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
520; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
521; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]]
522; CHECK:       if.end:
523; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
524; CHECK-NEXT:    ret i32 [[EXIT]]
525;
526; TAILFOLD-LABEL: @multiple_unique_exit3(
527; TAILFOLD-NEXT:  entry:
528; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
529; TAILFOLD:       for.cond:
530; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
531; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
532; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
533; TAILFOLD:       for.body:
534; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
535; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
536; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
537; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
538; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
539; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
540; TAILFOLD:       if.end:
541; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
542; TAILFOLD-NEXT:    ret i32 [[EXIT]]
543;
544entry:
545  br label %for.cond
546
547for.cond:
548  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
549  %cmp = icmp slt i32 %i, %n
550  br i1 %cmp, label %for.body, label %if.end
551
552for.body:
553  %iprom = sext i32 %i to i64
554  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
555  store i16 0, ptr %b, align 4
556  %inc = add nsw i32 %i, 1
557  %cmp2 = icmp slt i32 %i, 2096
558  br i1 %cmp2, label %for.cond, label %if.end
559
560if.end:
561  %exit = phi i32 [0, %for.cond], [1, %for.body]
562  ret i32 %exit
563}
564
565; multiple exits w/distinct target blocks
566define i32 @multiple_exit_blocks(ptr %p, i32 %n) {
567; CHECK-LABEL: @multiple_exit_blocks(
568; CHECK-NEXT:  entry:
569; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
570; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
571; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
572; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
573; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
574; CHECK:       vector.ph:
575; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
576; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
577; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
578; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
579; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
580; CHECK:       vector.body:
581; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
582; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
583; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
584; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
585; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
586; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
587; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
588; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
589; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
590; CHECK:       middle.block:
591; CHECK-NEXT:    br label [[SCALAR_PH]]
592; CHECK:       scalar.ph:
593; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
594; CHECK-NEXT:    br label [[FOR_COND:%.*]]
595; CHECK:       for.cond:
596; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
597; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
598; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
599; CHECK:       for.body:
600; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
601; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
602; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
603; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
604; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
605; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
606; CHECK:       if.end:
607; CHECK-NEXT:    ret i32 0
608; CHECK:       if.end2:
609; CHECK-NEXT:    ret i32 1
610;
611; TAILFOLD-LABEL: @multiple_exit_blocks(
612; TAILFOLD-NEXT:  entry:
613; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
614; TAILFOLD:       for.cond:
615; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
616; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
617; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
618; TAILFOLD:       for.body:
619; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
620; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
621; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
622; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
623; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
624; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
625; TAILFOLD:       if.end:
626; TAILFOLD-NEXT:    ret i32 0
627; TAILFOLD:       if.end2:
628; TAILFOLD-NEXT:    ret i32 1
629;
630entry:
631  br label %for.cond
632
633for.cond:
634  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
635  %cmp = icmp slt i32 %i, %n
636  br i1 %cmp, label %for.body, label %if.end
637
638for.body:
639  %iprom = sext i32 %i to i64
640  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
641  store i16 0, ptr %b, align 4
642  %inc = add nsw i32 %i, 1
643  %cmp2 = icmp slt i32 %i, 2096
644  br i1 %cmp2, label %for.cond, label %if.end2
645
646if.end:
647  ret i32 0
648
649if.end2:
650  ret i32 1
651}
652
653; LCSSA, common value each exit
654define i32 @multiple_exit_blocks2(ptr %p, i32 %n) {
655; CHECK-LABEL: @multiple_exit_blocks2(
656; CHECK-NEXT:  entry:
657; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
658; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
659; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
660; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
661; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
662; CHECK:       vector.ph:
663; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
664; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
665; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
666; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
667; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
668; CHECK:       vector.body:
669; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
670; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
671; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
672; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
673; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
674; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
675; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
676; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
677; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
678; CHECK:       middle.block:
679; CHECK-NEXT:    br label [[SCALAR_PH]]
680; CHECK:       scalar.ph:
681; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
682; CHECK-NEXT:    br label [[FOR_COND:%.*]]
683; CHECK:       for.cond:
684; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
685; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
686; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
687; CHECK:       for.body:
688; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
689; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
690; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
691; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
692; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
693; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
694; CHECK:       if.end:
695; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
696; CHECK-NEXT:    ret i32 [[I_LCSSA]]
697; CHECK:       if.end2:
698; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
699; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
700;
701; TAILFOLD-LABEL: @multiple_exit_blocks2(
702; TAILFOLD-NEXT:  entry:
703; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
704; TAILFOLD:       for.cond:
705; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
706; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
707; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
708; TAILFOLD:       for.body:
709; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
710; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
711; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
712; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
713; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
714; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
715; TAILFOLD:       if.end:
716; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
717; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
718; TAILFOLD:       if.end2:
719; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
720; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
721;
722entry:
723  br label %for.cond
724
725for.cond:
726  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
727  %cmp = icmp slt i32 %i, %n
728  br i1 %cmp, label %for.body, label %if.end
729
730for.body:
731  %iprom = sext i32 %i to i64
732  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
733  store i16 0, ptr %b, align 4
734  %inc = add nsw i32 %i, 1
735  %cmp2 = icmp slt i32 %i, 2096
736  br i1 %cmp2, label %for.cond, label %if.end2
737
738if.end:
739  ret i32 %i
740
741if.end2:
742  ret i32 %i
743}
744
745; LCSSA, distinct value each exit
746define i32 @multiple_exit_blocks3(ptr %p, i32 %n) {
747; CHECK-LABEL: @multiple_exit_blocks3(
748; CHECK-NEXT:  entry:
749; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
750; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
751; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
752; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
753; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
754; CHECK:       vector.ph:
755; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
756; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
757; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
758; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
759; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
760; CHECK:       vector.body:
761; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
762; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
763; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
764; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
765; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
766; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP6]], align 4
767; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
768; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
769; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
770; CHECK:       middle.block:
771; CHECK-NEXT:    br label [[SCALAR_PH]]
772; CHECK:       scalar.ph:
773; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
774; CHECK-NEXT:    br label [[FOR_COND:%.*]]
775; CHECK:       for.cond:
776; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
777; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
778; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
779; CHECK:       for.body:
780; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
781; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
782; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
783; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
784; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
785; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]]
786; CHECK:       if.end:
787; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
788; CHECK-NEXT:    ret i32 [[I_LCSSA]]
789; CHECK:       if.end2:
790; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
791; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
792;
793; TAILFOLD-LABEL: @multiple_exit_blocks3(
794; TAILFOLD-NEXT:  entry:
795; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
796; TAILFOLD:       for.cond:
797; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
798; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
799; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
800; TAILFOLD:       for.body:
801; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
802; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
803; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
804; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
805; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
806; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
807; TAILFOLD:       if.end:
808; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
809; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
810; TAILFOLD:       if.end2:
811; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
812; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
813;
814entry:
815  br label %for.cond
816
817for.cond:
818  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
819  %cmp = icmp slt i32 %i, %n
820  br i1 %cmp, label %for.body, label %if.end
821
822for.body:
823  %iprom = sext i32 %i to i64
824  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
825  store i16 0, ptr %b, align 4
826  %inc = add nsw i32 %i, 1
827  %cmp2 = icmp slt i32 %i, 2096
828  br i1 %cmp2, label %for.cond, label %if.end2
829
830if.end:
831  ret i32 %i
832
833if.end2:
834  ret i32 %inc
835}
836
837; unique exit case but with a switch as two edges between the same pair of
838; blocks is an often missed edge case
839define i32 @multiple_exit_switch(ptr %p, i32 %n) {
840; CHECK-LABEL: @multiple_exit_switch(
841; CHECK-NEXT:  entry:
842; CHECK-NEXT:    br label [[FOR_COND:%.*]]
843; CHECK:       for.cond:
844; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
845; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
846; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
847; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
848; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
849; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
850; CHECK-NEXT:      i32 2096, label [[IF_END:%.*]]
851; CHECK-NEXT:      i32 2097, label [[IF_END]]
852; CHECK-NEXT:    ]
853; CHECK:       if.end:
854; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
855; CHECK-NEXT:    ret i32 [[I_LCSSA]]
856;
857; TAILFOLD-LABEL: @multiple_exit_switch(
858; TAILFOLD-NEXT:  entry:
859; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
860; TAILFOLD:       for.cond:
861; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
862; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
863; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
864; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
865; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
866; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
867; TAILFOLD-NEXT:      i32 2096, label [[IF_END:%.*]]
868; TAILFOLD-NEXT:      i32 2097, label [[IF_END]]
869; TAILFOLD-NEXT:    ]
870; TAILFOLD:       if.end:
871; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
872; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
873;
874entry:
875  br label %for.cond
876
877for.cond:
878  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
879  %iprom = sext i32 %i to i64
880  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
881  store i16 0, ptr %b, align 4
882  %inc = add nsw i32 %i, 1
883  switch i32 %i, label %for.cond [
884  i32 2096, label %if.end
885  i32 2097, label %if.end
886  ]
887
888if.end:
889  ret i32 %i
890}
891
892; multiple exit case but with a switch as multiple exiting edges from
893; a single block is a commonly missed edge case
894define i32 @multiple_exit_switch2(ptr %p, i32 %n) {
895; CHECK-LABEL: @multiple_exit_switch2(
896; CHECK-NEXT:  entry:
897; CHECK-NEXT:    br label [[FOR_COND:%.*]]
898; CHECK:       for.cond:
899; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
900; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
901; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
902; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
903; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
904; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
905; CHECK-NEXT:      i32 2096, label [[IF_END:%.*]]
906; CHECK-NEXT:      i32 2097, label [[IF_END2:%.*]]
907; CHECK-NEXT:    ]
908; CHECK:       if.end:
909; CHECK-NEXT:    ret i32 0
910; CHECK:       if.end2:
911; CHECK-NEXT:    ret i32 1
912;
913; TAILFOLD-LABEL: @multiple_exit_switch2(
914; TAILFOLD-NEXT:  entry:
915; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
916; TAILFOLD:       for.cond:
917; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
918; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
919; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
920; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
921; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
922; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
923; TAILFOLD-NEXT:      i32 2096, label [[IF_END:%.*]]
924; TAILFOLD-NEXT:      i32 2097, label [[IF_END2:%.*]]
925; TAILFOLD-NEXT:    ]
926; TAILFOLD:       if.end:
927; TAILFOLD-NEXT:    ret i32 0
928; TAILFOLD:       if.end2:
929; TAILFOLD-NEXT:    ret i32 1
930;
931entry:
932  br label %for.cond
933
934for.cond:
935  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
936  %iprom = sext i32 %i to i64
937  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
938  store i16 0, ptr %b, align 4
939  %inc = add nsw i32 %i, 1
940  switch i32 %i, label %for.cond [
941  i32 2096, label %if.end
942  i32 2097, label %if.end2
943  ]
944
945if.end:
946  ret i32 0
947
948if.end2:
949  ret i32 1
950}
951
952define i32 @multiple_latch1(ptr %p) {
953; CHECK-LABEL: @multiple_latch1(
954; CHECK-NEXT:  entry:
955; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
956; CHECK:       for.body:
957; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
958; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
959; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
960; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
961; CHECK:       for.second:
962; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
963; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
964; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
965; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
966; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
967; CHECK:       for.body.backedge:
968; CHECK-NEXT:    br label [[FOR_BODY]]
969; CHECK:       for.end:
970; CHECK-NEXT:    ret i32 0
971;
972; TAILFOLD-LABEL: @multiple_latch1(
973; TAILFOLD-NEXT:  entry:
974; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
975; TAILFOLD:       for.body:
976; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
977; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
978; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
979; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
980; TAILFOLD:       for.second:
981; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
982; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
983; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
984; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
985; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
986; TAILFOLD:       for.body.backedge:
987; TAILFOLD-NEXT:    br label [[FOR_BODY]]
988; TAILFOLD:       for.end:
989; TAILFOLD-NEXT:    ret i32 0
990;
991entry:
992  br label %for.body
993
994for.body:
995  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
996  %inc = add nsw i32 %i.02, 1
997  %cmp = icmp slt i32 %inc, 16
998  br i1 %cmp, label %for.body.backedge, label %for.second
999
1000for.second:
1001  %iprom = sext i32 %i.02 to i64
1002  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
1003  store i16 0, ptr %b, align 4
1004  %cmps = icmp sgt i32 %inc, 16
1005  br i1 %cmps, label %for.body.backedge, label %for.end
1006
1007for.body.backedge:
1008  br label %for.body
1009
1010for.end:
1011  ret i32 0
1012}
1013
1014
1015; two back branches - loop simplify with convert this to the same form
1016; as previous before vectorizer sees it, but show that.
1017define i32 @multiple_latch2(ptr %p) {
1018; CHECK-LABEL: @multiple_latch2(
1019; CHECK-NEXT:  entry:
1020; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1021; CHECK:       for.body:
1022; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
1023; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
1024; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
1025; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1026; CHECK:       for.body.backedge:
1027; CHECK-NEXT:    br label [[FOR_BODY]]
1028; CHECK:       for.second:
1029; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1030; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
1031; CHECK-NEXT:    store i16 0, ptr [[B]], align 4
1032; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1033; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1034; CHECK:       for.end:
1035; CHECK-NEXT:    ret i32 0
1036;
1037; TAILFOLD-LABEL: @multiple_latch2(
1038; TAILFOLD-NEXT:  entry:
1039; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
1040; TAILFOLD:       for.body:
1041; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
1042; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
1043; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
1044; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1045; TAILFOLD:       for.body.backedge:
1046; TAILFOLD-NEXT:    br label [[FOR_BODY]]
1047; TAILFOLD:       for.second:
1048; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1049; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[IPROM]]
1050; TAILFOLD-NEXT:    store i16 0, ptr [[B]], align 4
1051; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1052; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1053; TAILFOLD:       for.end:
1054; TAILFOLD-NEXT:    ret i32 0
1055;
1056entry:
1057  br label %for.body
1058
1059for.body:
1060  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
1061  %inc = add nsw i32 %i.02, 1
1062  %cmp = icmp slt i32 %inc, 16
1063  br i1 %cmp, label %for.body, label %for.second
1064
1065for.second:
1066  %iprom = sext i32 %i.02 to i64
1067  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
1068  store i16 0, ptr %b, align 4
1069  %cmps = icmp sgt i32 %inc, 16
1070  br i1 %cmps, label %for.body, label %for.end
1071
1072for.end:
1073  ret i32 0
1074}
1075
1076
1077; Check interaction between block predication and early exits.  We need the
1078; condition on the early exit to remain dead (i.e. not be used when forming
1079; the predicate mask).
1080define void @scalar_predication(ptr %addr) {
1081; CHECK-LABEL: @scalar_predication(
1082; CHECK-NEXT:  entry:
1083; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1084; CHECK:       vector.ph:
1085; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1086; CHECK:       vector.body:
1087; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
1088; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1089; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, ptr [[ADDR:%.*]], i64 [[TMP0]]
1090; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0
1091; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4
1092; CHECK-NEXT:    [[TMP3:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1093; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
1094; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
1095; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1096; CHECK:       pred.store.if:
1097; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP0]]
1098; CHECK-NEXT:    store float 1.000000e+01, ptr [[TMP6]], align 4
1099; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
1100; CHECK:       pred.store.continue:
1101; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
1102; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
1103; CHECK:       pred.store.if1:
1104; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
1105; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP8]]
1106; CHECK-NEXT:    store float 1.000000e+01, ptr [[TMP9]], align 4
1107; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
1108; CHECK:       pred.store.continue2:
1109; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1110; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1111; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1112; CHECK:       middle.block:
1113; CHECK-NEXT:    br label [[SCALAR_PH]]
1114; CHECK:       scalar.ph:
1115; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1116; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1117; CHECK:       loop.header:
1118; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1119; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[IV]]
1120; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1121; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1122; CHECK:       loop.body:
1123; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[GEP]], align 4
1124; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1125; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1126; CHECK:       then:
1127; CHECK-NEXT:    store float 1.000000e+01, ptr [[GEP]], align 4
1128; CHECK-NEXT:    br label [[LOOP_LATCH]]
1129; CHECK:       loop.latch:
1130; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1131; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
1132; CHECK:       exit:
1133; CHECK-NEXT:    ret void
1134;
1135; TAILFOLD-LABEL: @scalar_predication(
1136; TAILFOLD-NEXT:  entry:
1137; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1138; TAILFOLD:       loop.header:
1139; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1140; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, ptr [[ADDR:%.*]], i64 [[IV]]
1141; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1142; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1143; TAILFOLD:       loop.body:
1144; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, ptr [[GEP]], align 4
1145; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1146; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1147; TAILFOLD:       then:
1148; TAILFOLD-NEXT:    store float 1.000000e+01, ptr [[GEP]], align 4
1149; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1150; TAILFOLD:       loop.latch:
1151; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1152; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1153; TAILFOLD:       exit:
1154; TAILFOLD-NEXT:    ret void
1155;
1156entry:
1157  br label %loop.header
1158
1159loop.header:
1160  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1161  %gep = getelementptr float, ptr %addr, i64 %iv
1162  %exitcond.not = icmp eq i64 %iv, 200
1163  br i1 %exitcond.not, label %exit, label %loop.body
1164
1165loop.body:
1166  %0 = load float, ptr %gep, align 4
1167  %pred = fcmp oeq float %0, 0.0
1168  br i1 %pred, label %loop.latch, label %then
1169
1170then:
1171  store float 10.0, ptr %gep, align 4
1172  br label %loop.latch
1173
1174loop.latch:
1175  %iv.next = add nuw nsw i64 %iv, 1
1176  br label %loop.header
1177
1178exit:
1179  ret void
1180}
1181
1182define i32 @me_reduction(ptr %addr) {
1183; CHECK-LABEL: @me_reduction(
1184; CHECK-NEXT:  entry:
1185; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1186; CHECK:       vector.ph:
1187; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1188; CHECK:       vector.body:
1189; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1190; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
1191; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1192; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i64 [[TMP0]]
1193; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
1194; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
1195; CHECK-NEXT:    [[TMP3]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1196; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1197; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1198; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1199; CHECK:       middle.block:
1200; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP3]])
1201; CHECK-NEXT:    br label [[SCALAR_PH]]
1202; CHECK:       scalar.ph:
1203; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1204; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1205; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1206; CHECK:       loop.header:
1207; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1208; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1209; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ADDR]], i64 [[IV]]
1210; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1211; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1212; CHECK:       loop.latch:
1213; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[GEP]], align 4
1214; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP6]]
1215; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1216; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1217; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]]
1218; CHECK:       exit:
1219; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1220; CHECK-NEXT:    ret i32 [[LCSSA]]
1221;
1222; TAILFOLD-LABEL: @me_reduction(
1223; TAILFOLD-NEXT:  entry:
1224; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1225; TAILFOLD:       loop.header:
1226; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1227; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1228; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i64 [[IV]]
1229; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1230; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1231; TAILFOLD:       loop.latch:
1232; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
1233; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1234; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1235; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1236; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1237; TAILFOLD:       exit:
1238; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1239; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1240;
1241entry:
1242  br label %loop.header
1243
1244loop.header:
1245  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1246  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1247  %gep = getelementptr i32, ptr %addr, i64 %iv
1248  %exitcond.not = icmp eq i64 %iv, 200
1249  br i1 %exitcond.not, label %exit, label %loop.latch
1250
1251loop.latch:
1252  %0 = load i32, ptr %gep, align 4
1253  %accum.next = add i32 %accum, %0
1254  %iv.next = add nuw nsw i64 %iv, 1
1255  %exitcond2.not = icmp eq i64 %iv, 400
1256  br i1 %exitcond2.not, label %exit, label %loop.header
1257
1258exit:
1259  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1260  ret i32 %lcssa
1261}
1262
1263; TODO: The current definition of reduction is too strict, we can vectorize
1264; this.  There's an analogous single exit case where we extract the N-1
1265; value of the reduction that we can also handle.  If we fix the later, the
1266; multiple exit case probably falls out.
1267define i32 @me_reduction2(ptr %addr) {
1268; CHECK-LABEL: @me_reduction2(
1269; CHECK-NEXT:  entry:
1270; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1271; CHECK:       loop.header:
1272; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1273; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1274; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i64 [[IV]]
1275; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1276; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1277; CHECK:       loop.latch:
1278; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
1279; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1280; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1281; CHECK-NEXT:    br label [[LOOP_HEADER]]
1282; CHECK:       exit:
1283; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1284; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1285;
1286; TAILFOLD-LABEL: @me_reduction2(
1287; TAILFOLD-NEXT:  entry:
1288; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1289; TAILFOLD:       loop.header:
1290; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1291; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1292; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[ADDR:%.*]], i64 [[IV]]
1293; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1294; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1295; TAILFOLD:       loop.latch:
1296; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
1297; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1298; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1299; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1300; TAILFOLD:       exit:
1301; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1302; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1303;
1304entry:
1305  br label %loop.header
1306
1307loop.header:
1308  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1309  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1310  %gep = getelementptr i32, ptr %addr, i64 %iv
1311  %exitcond.not = icmp eq i64 %iv, 200
1312  br i1 %exitcond.not, label %exit, label %loop.latch
1313
1314loop.latch:
1315  %0 = load i32, ptr %gep, align 4
1316  %accum.next = add i32 %accum, %0
1317  %iv.next = add nuw nsw i64 %iv, 1
1318  br label %loop.header
1319
1320exit:
1321  ret i32 %accum
1322}
1323
1324