xref: /llvm-project/llvm/test/Transforms/LoopVectorize/loop-form.ll (revision 6d3e3ae8a9ca10e063d541a959f4fe4cdb003dba)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
63; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 1
64; TAILFOLD-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
65; TAILFOLD-NEXT:    [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
66; TAILFOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
67; TAILFOLD-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
68; TAILFOLD:       pred.store.if:
69; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
70; TAILFOLD-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
71; TAILFOLD-NEXT:    store i16 0, i16* [[TMP7]], align 4
72; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
73; TAILFOLD:       pred.store.continue:
74; TAILFOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
75; TAILFOLD-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
76; TAILFOLD:       pred.store.if1:
77; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
78; TAILFOLD-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]]
79; TAILFOLD-NEXT:    store i16 0, i16* [[TMP10]], align 4
80; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
81; TAILFOLD:       pred.store.continue2:
82; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
83; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
84; TAILFOLD-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
85; TAILFOLD-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
86; TAILFOLD:       middle.block:
87; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
88; TAILFOLD:       scalar.ph:
89; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
90; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
91; TAILFOLD:       for.cond:
92; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
93; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
94; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
95; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
96; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
97; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
98; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
99; TAILFOLD:       if.end:
100; TAILFOLD-NEXT:    ret void
101;
102entry:
103  br label %for.cond
104
105for.cond:
106  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
107  %iprom = sext i32 %i to i64
108  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
109  store i16 0, i16* %b, align 4
110  %inc = add nsw i32 %i, 1
111  %cmp = icmp slt i32 %i, %n
112  br i1 %cmp, label %for.cond, label %if.end
113
114if.end:
115  ret void
116}
117
118define void @early_exit(i16* %p, i32 %n) {
119; CHECK-LABEL: @early_exit(
120; CHECK-NEXT:  entry:
121; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
122; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
123; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
124; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
125; CHECK:       vector.ph:
126; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
127; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
128; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
129; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
130; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
131; CHECK:       vector.body:
132; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
133; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
134; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
135; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
136; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
137; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
138; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
139; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
140; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
141; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
142; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
143; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
144; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
145; CHECK:       middle.block:
146; CHECK-NEXT:    br label [[SCALAR_PH]]
147; CHECK:       scalar.ph:
148; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
149; CHECK-NEXT:    br label [[FOR_COND:%.*]]
150; CHECK:       for.cond:
151; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
152; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
153; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
154; CHECK:       for.body:
155; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
156; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
157; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
158; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
159; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
160; CHECK:       if.end:
161; CHECK-NEXT:    ret void
162;
163; TAILFOLD-LABEL: @early_exit(
164; TAILFOLD-NEXT:  entry:
165; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
166; TAILFOLD:       for.cond:
167; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
168; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
169; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
170; TAILFOLD:       for.body:
171; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
172; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
173; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
174; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
175; TAILFOLD-NEXT:    br label [[FOR_COND]]
176; TAILFOLD:       if.end:
177; TAILFOLD-NEXT:    ret void
178;
179entry:
180  br label %for.cond
181
182for.cond:
183  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
184  %cmp = icmp slt i32 %i, %n
185  br i1 %cmp, label %for.body, label %if.end
186
187for.body:
188  %iprom = sext i32 %i to i64
189  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
190  store i16 0, i16* %b, align 4
191  %inc = add nsw i32 %i, 1
192  br label %for.cond
193
194if.end:
195  ret void
196}
197
198; Same as early_exit, but with optsize to prevent the use of
199; a scalar epilogue.  -- Can't vectorize this in either case.
200define void @optsize(i16* %p, i32 %n) optsize {
201; CHECK-LABEL: @optsize(
202; CHECK-NEXT:  entry:
203; CHECK-NEXT:    br label [[FOR_COND:%.*]]
204; CHECK:       for.cond:
205; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
206; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
207; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
208; CHECK:       for.body:
209; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
210; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
211; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
212; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
213; CHECK-NEXT:    br label [[FOR_COND]]
214; CHECK:       if.end:
215; CHECK-NEXT:    ret void
216;
217; TAILFOLD-LABEL: @optsize(
218; TAILFOLD-NEXT:  entry:
219; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
220; TAILFOLD:       for.cond:
221; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
222; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
223; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
224; TAILFOLD:       for.body:
225; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
226; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
227; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
228; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
229; TAILFOLD-NEXT:    br label [[FOR_COND]]
230; TAILFOLD:       if.end:
231; TAILFOLD-NEXT:    ret void
232;
233entry:
234  br label %for.cond
235
236for.cond:
237  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
238  %cmp = icmp slt i32 %i, %n
239  br i1 %cmp, label %for.body, label %if.end
240
241for.body:
242  %iprom = sext i32 %i to i64
243  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
244  store i16 0, i16* %b, align 4
245  %inc = add nsw i32 %i, 1
246  br label %for.cond
247
248if.end:
249  ret void
250}
251
252
253; multiple exit - no values inside the loop used outside
254define void @multiple_unique_exit(i16* %p, i32 %n) {
255; CHECK-LABEL: @multiple_unique_exit(
256; CHECK-NEXT:  entry:
257; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
258; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
259; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
260; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
261; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
262; CHECK:       vector.ph:
263; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
264; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
265; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
266; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
267; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
268; CHECK:       vector.body:
269; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
270; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
271; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
272; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
273; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
274; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
275; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
276; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
277; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
278; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
279; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
280; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
281; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
282; CHECK:       middle.block:
283; CHECK-NEXT:    br label [[SCALAR_PH]]
284; CHECK:       scalar.ph:
285; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
286; CHECK-NEXT:    br label [[FOR_COND:%.*]]
287; CHECK:       for.cond:
288; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
289; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
290; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
291; CHECK:       for.body:
292; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
293; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
294; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
295; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
296; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
297; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
298; CHECK:       if.end:
299; CHECK-NEXT:    ret void
300;
301; TAILFOLD-LABEL: @multiple_unique_exit(
302; TAILFOLD-NEXT:  entry:
303; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
304; TAILFOLD:       for.cond:
305; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
306; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
307; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
308; TAILFOLD:       for.body:
309; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
310; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
311; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
312; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
313; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
314; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
315; TAILFOLD:       if.end:
316; TAILFOLD-NEXT:    ret void
317;
318entry:
319  br label %for.cond
320
321for.cond:
322  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
323  %cmp = icmp slt i32 %i, %n
324  br i1 %cmp, label %for.body, label %if.end
325
326for.body:
327  %iprom = sext i32 %i to i64
328  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
329  store i16 0, i16* %b, align 4
330  %inc = add nsw i32 %i, 1
331  %cmp2 = icmp slt i32 %i, 2096
332  br i1 %cmp2, label %for.cond, label %if.end
333
334if.end:
335  ret void
336}
337
338; multiple exit - with an lcssa phi
339define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
340; CHECK-LABEL: @multiple_unique_exit2(
341; CHECK-NEXT:  entry:
342; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
343; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
344; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
345; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
346; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
347; CHECK:       vector.ph:
348; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
349; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
350; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
351; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
352; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
353; CHECK:       vector.body:
354; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
355; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
356; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
357; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
358; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
359; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
360; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
361; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
362; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
363; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
364; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
365; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
366; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
367; CHECK:       middle.block:
368; CHECK-NEXT:    br label [[SCALAR_PH]]
369; CHECK:       scalar.ph:
370; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
371; CHECK-NEXT:    br label [[FOR_COND:%.*]]
372; CHECK:       for.cond:
373; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
374; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
375; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
376; CHECK:       for.body:
377; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
378; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
379; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
380; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
381; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
382; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
383; CHECK:       if.end:
384; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
385; CHECK-NEXT:    ret i32 [[I_LCSSA]]
386;
387; TAILFOLD-LABEL: @multiple_unique_exit2(
388; TAILFOLD-NEXT:  entry:
389; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
390; TAILFOLD:       for.cond:
391; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
392; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
393; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
394; TAILFOLD:       for.body:
395; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
396; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
397; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
398; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
399; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
400; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
401; TAILFOLD:       if.end:
402; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
403; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
404;
405entry:
406  br label %for.cond
407
408for.cond:
409  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
410  %cmp = icmp slt i32 %i, %n
411  br i1 %cmp, label %for.body, label %if.end
412
413for.body:
414  %iprom = sext i32 %i to i64
415  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
416  store i16 0, i16* %b, align 4
417  %inc = add nsw i32 %i, 1
418  %cmp2 = icmp slt i32 %i, 2096
419  br i1 %cmp2, label %for.cond, label %if.end
420
421if.end:
422  ret i32 %i
423}
424
425; multiple exit w/a non lcssa phi
426define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
427; CHECK-LABEL: @multiple_unique_exit3(
428; CHECK-NEXT:  entry:
429; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
430; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
431; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
432; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
433; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
434; CHECK:       vector.ph:
435; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
436; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
437; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
438; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
439; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
440; CHECK:       vector.body:
441; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
442; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
443; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
444; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
445; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
446; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
447; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
448; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
449; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
450; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
451; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
452; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
453; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
454; CHECK:       middle.block:
455; CHECK-NEXT:    br label [[SCALAR_PH]]
456; CHECK:       scalar.ph:
457; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
458; CHECK-NEXT:    br label [[FOR_COND:%.*]]
459; CHECK:       for.cond:
460; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
461; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
462; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
463; CHECK:       for.body:
464; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
465; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
466; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
467; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
468; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
469; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
470; CHECK:       if.end:
471; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
472; CHECK-NEXT:    ret i32 [[EXIT]]
473;
474; TAILFOLD-LABEL: @multiple_unique_exit3(
475; TAILFOLD-NEXT:  entry:
476; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
477; TAILFOLD:       for.cond:
478; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
479; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
480; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
481; TAILFOLD:       for.body:
482; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
483; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
484; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
485; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
486; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
487; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
488; TAILFOLD:       if.end:
489; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
490; TAILFOLD-NEXT:    ret i32 [[EXIT]]
491;
492entry:
493  br label %for.cond
494
495for.cond:
496  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
497  %cmp = icmp slt i32 %i, %n
498  br i1 %cmp, label %for.body, label %if.end
499
500for.body:
501  %iprom = sext i32 %i to i64
502  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
503  store i16 0, i16* %b, align 4
504  %inc = add nsw i32 %i, 1
505  %cmp2 = icmp slt i32 %i, 2096
506  br i1 %cmp2, label %for.cond, label %if.end
507
508if.end:
509  %exit = phi i32 [0, %for.cond], [1, %for.body]
510  ret i32 %exit
511}
512
513; multiple exits w/distinct target blocks
514define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
515; CHECK-LABEL: @multiple_exit_blocks(
516; CHECK-NEXT:  entry:
517; CHECK-NEXT:    br label [[FOR_COND:%.*]]
518; CHECK:       for.cond:
519; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
520; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
521; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
522; CHECK:       for.body:
523; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
524; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
525; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
526; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
527; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
528; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
529; CHECK:       if.end:
530; CHECK-NEXT:    ret i32 0
531; CHECK:       if.end2:
532; CHECK-NEXT:    ret i32 1
533;
534; TAILFOLD-LABEL: @multiple_exit_blocks(
535; TAILFOLD-NEXT:  entry:
536; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
537; TAILFOLD:       for.cond:
538; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
539; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
540; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
541; TAILFOLD:       for.body:
542; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
543; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
544; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
545; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
546; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
547; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
548; TAILFOLD:       if.end:
549; TAILFOLD-NEXT:    ret i32 0
550; TAILFOLD:       if.end2:
551; TAILFOLD-NEXT:    ret i32 1
552;
553entry:
554  br label %for.cond
555
556for.cond:
557  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
558  %cmp = icmp slt i32 %i, %n
559  br i1 %cmp, label %for.body, label %if.end
560
561for.body:
562  %iprom = sext i32 %i to i64
563  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
564  store i16 0, i16* %b, align 4
565  %inc = add nsw i32 %i, 1
566  %cmp2 = icmp slt i32 %i, 2096
567  br i1 %cmp2, label %for.cond, label %if.end2
568
569if.end:
570  ret i32 0
571
572if.end2:
573  ret i32 1
574}
575
576; LCSSA, common value each exit
577define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
578; CHECK-LABEL: @multiple_exit_blocks2(
579; CHECK-NEXT:  entry:
580; CHECK-NEXT:    br label [[FOR_COND:%.*]]
581; CHECK:       for.cond:
582; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
583; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
584; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
585; CHECK:       for.body:
586; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
587; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
588; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
589; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
590; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
591; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
592; CHECK:       if.end:
593; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
594; CHECK-NEXT:    ret i32 [[I_LCSSA]]
595; CHECK:       if.end2:
596; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
597; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
598;
599; TAILFOLD-LABEL: @multiple_exit_blocks2(
600; TAILFOLD-NEXT:  entry:
601; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
602; TAILFOLD:       for.cond:
603; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
604; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
605; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
606; TAILFOLD:       for.body:
607; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
608; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
609; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
610; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
611; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
612; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
613; TAILFOLD:       if.end:
614; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
615; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
616; TAILFOLD:       if.end2:
617; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
618; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
619;
620entry:
621  br label %for.cond
622
623for.cond:
624  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
625  %cmp = icmp slt i32 %i, %n
626  br i1 %cmp, label %for.body, label %if.end
627
628for.body:
629  %iprom = sext i32 %i to i64
630  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
631  store i16 0, i16* %b, align 4
632  %inc = add nsw i32 %i, 1
633  %cmp2 = icmp slt i32 %i, 2096
634  br i1 %cmp2, label %for.cond, label %if.end2
635
636if.end:
637  ret i32 %i
638
639if.end2:
640  ret i32 %i
641}
642
643; LCSSA, distinct value each exit
644define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
645; CHECK-LABEL: @multiple_exit_blocks3(
646; CHECK-NEXT:  entry:
647; CHECK-NEXT:    br label [[FOR_COND:%.*]]
648; CHECK:       for.cond:
649; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
650; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
651; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
652; CHECK:       for.body:
653; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
654; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
655; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
656; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
657; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
658; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
659; CHECK:       if.end:
660; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
661; CHECK-NEXT:    ret i32 [[I_LCSSA]]
662; CHECK:       if.end2:
663; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
664; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
665;
666; TAILFOLD-LABEL: @multiple_exit_blocks3(
667; TAILFOLD-NEXT:  entry:
668; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
669; TAILFOLD:       for.cond:
670; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
671; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
672; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
673; TAILFOLD:       for.body:
674; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
675; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
676; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
677; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
678; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
679; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
680; TAILFOLD:       if.end:
681; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
682; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
683; TAILFOLD:       if.end2:
684; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
685; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
686;
687entry:
688  br label %for.cond
689
690for.cond:
691  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
692  %cmp = icmp slt i32 %i, %n
693  br i1 %cmp, label %for.body, label %if.end
694
695for.body:
696  %iprom = sext i32 %i to i64
697  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
698  store i16 0, i16* %b, align 4
699  %inc = add nsw i32 %i, 1
700  %cmp2 = icmp slt i32 %i, 2096
701  br i1 %cmp2, label %for.cond, label %if.end2
702
703if.end:
704  ret i32 %i
705
706if.end2:
707  ret i32 %inc
708}
709
710; unique exit case but with a switch as two edges between the same pair of
711; blocks is an often missed edge case
712define i32 @multiple_exit_switch(i16* %p, i32 %n) {
713; CHECK-LABEL: @multiple_exit_switch(
714; CHECK-NEXT:  entry:
715; CHECK-NEXT:    br label [[FOR_COND:%.*]]
716; CHECK:       for.cond:
717; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
718; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
719; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
720; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
721; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
722; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
723; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
724; CHECK-NEXT:    i32 2097, label [[IF_END]]
725; CHECK-NEXT:    ]
726; CHECK:       if.end:
727; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
728; CHECK-NEXT:    ret i32 [[I_LCSSA]]
729;
730; TAILFOLD-LABEL: @multiple_exit_switch(
731; TAILFOLD-NEXT:  entry:
732; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
733; TAILFOLD:       for.cond:
734; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
735; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
736; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
737; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
738; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
739; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
740; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
741; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
742; TAILFOLD-NEXT:    ]
743; TAILFOLD:       if.end:
744; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
745; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
746;
747entry:
748  br label %for.cond
749
750for.cond:
751  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
752  %iprom = sext i32 %i to i64
753  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
754  store i16 0, i16* %b, align 4
755  %inc = add nsw i32 %i, 1
756  switch i32 %i, label %for.cond [
757  i32 2096, label %if.end
758  i32 2097, label %if.end
759  ]
760
761if.end:
762  ret i32 %i
763}
764
765; multiple exit case but with a switch as multiple exiting edges from
766; a single block is a commonly missed edge case
767define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
768; CHECK-LABEL: @multiple_exit_switch2(
769; CHECK-NEXT:  entry:
770; CHECK-NEXT:    br label [[FOR_COND:%.*]]
771; CHECK:       for.cond:
772; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
773; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
774; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
775; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
776; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
777; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
778; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
779; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
780; CHECK-NEXT:    ]
781; CHECK:       if.end:
782; CHECK-NEXT:    ret i32 0
783; CHECK:       if.end2:
784; CHECK-NEXT:    ret i32 1
785;
786; TAILFOLD-LABEL: @multiple_exit_switch2(
787; TAILFOLD-NEXT:  entry:
788; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
789; TAILFOLD:       for.cond:
790; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
791; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
792; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
793; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
794; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
795; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
796; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
797; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
798; TAILFOLD-NEXT:    ]
799; TAILFOLD:       if.end:
800; TAILFOLD-NEXT:    ret i32 0
801; TAILFOLD:       if.end2:
802; TAILFOLD-NEXT:    ret i32 1
803;
804entry:
805  br label %for.cond
806
807for.cond:
808  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
809  %iprom = sext i32 %i to i64
810  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
811  store i16 0, i16* %b, align 4
812  %inc = add nsw i32 %i, 1
813  switch i32 %i, label %for.cond [
814  i32 2096, label %if.end
815  i32 2097, label %if.end2
816  ]
817
818if.end:
819  ret i32 0
820
821if.end2:
822  ret i32 1
823}
824
825define i32 @multiple_latch1(i16* %p) {
826; CHECK-LABEL: @multiple_latch1(
827; CHECK-NEXT:  entry:
828; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
829; CHECK:       for.body:
830; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
831; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
832; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
833; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
834; CHECK:       for.second:
835; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
836; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
837; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
838; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
839; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
840; CHECK:       for.body.backedge:
841; CHECK-NEXT:    br label [[FOR_BODY]]
842; CHECK:       for.end:
843; CHECK-NEXT:    ret i32 0
844;
845; TAILFOLD-LABEL: @multiple_latch1(
846; TAILFOLD-NEXT:  entry:
847; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
848; TAILFOLD:       for.body:
849; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
850; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
851; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
852; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
853; TAILFOLD:       for.second:
854; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
855; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
856; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
857; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
858; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
859; TAILFOLD:       for.body.backedge:
860; TAILFOLD-NEXT:    br label [[FOR_BODY]]
861; TAILFOLD:       for.end:
862; TAILFOLD-NEXT:    ret i32 0
863;
864entry:
865  br label %for.body
866
867for.body:
868  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
869  %inc = add nsw i32 %i.02, 1
870  %cmp = icmp slt i32 %inc, 16
871  br i1 %cmp, label %for.body.backedge, label %for.second
872
873for.second:
874  %iprom = sext i32 %i.02 to i64
875  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
876  store i16 0, i16* %b, align 4
877  %cmps = icmp sgt i32 %inc, 16
878  br i1 %cmps, label %for.body.backedge, label %for.end
879
880for.body.backedge:
881  br label %for.body
882
883for.end:
884  ret i32 0
885}
886
887
888; two back branches - loop simplify with convert this to the same form
889; as previous before vectorizer sees it, but show that.
890define i32 @multiple_latch2(i16* %p) {
891; CHECK-LABEL: @multiple_latch2(
892; CHECK-NEXT:  entry:
893; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
894; CHECK:       for.body:
895; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
896; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
897; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
898; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
899; CHECK:       for.body.backedge:
900; CHECK-NEXT:    br label [[FOR_BODY]]
901; CHECK:       for.second:
902; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
903; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
904; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
905; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
906; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
907; CHECK:       for.end:
908; CHECK-NEXT:    ret i32 0
909;
910; TAILFOLD-LABEL: @multiple_latch2(
911; TAILFOLD-NEXT:  entry:
912; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
913; TAILFOLD:       for.body:
914; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
915; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
916; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
917; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
918; TAILFOLD:       for.body.backedge:
919; TAILFOLD-NEXT:    br label [[FOR_BODY]]
920; TAILFOLD:       for.second:
921; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
922; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
923; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
924; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
925; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
926; TAILFOLD:       for.end:
927; TAILFOLD-NEXT:    ret i32 0
928;
929entry:
930  br label %for.body
931
932for.body:
933  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
934  %inc = add nsw i32 %i.02, 1
935  %cmp = icmp slt i32 %inc, 16
936  br i1 %cmp, label %for.body, label %for.second
937
938for.second:
939  %iprom = sext i32 %i.02 to i64
940  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
941  store i16 0, i16* %b, align 4
942  %cmps = icmp sgt i32 %inc, 16
943  br i1 %cmps, label %for.body, label %for.end
944
945for.end:
946  ret i32 0
947}
948
949
950; Check interaction between block predication and early exits.  We need the
951; condition on the early exit to remain dead (i.e. not be used when forming
952; the predicate mask).
953define void @scalar_predication(float* %addr) {
954; CHECK-LABEL: @scalar_predication(
955; CHECK-NEXT:  entry:
956; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
957; CHECK:       vector.ph:
958; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
959; CHECK:       vector.body:
960; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
961; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
962; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
963; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
964; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
965; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
966; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
967; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
968; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
969; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
970; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
971; CHECK:       pred.store.if:
972; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
973; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
974; CHECK:       pred.store.continue:
975; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
976; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
977; CHECK:       pred.store.if1:
978; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
979; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
980; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
981; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
982; CHECK:       pred.store.continue2:
983; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
984; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
985; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
986; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
987; CHECK:       middle.block:
988; CHECK-NEXT:    br label [[SCALAR_PH]]
989; CHECK:       scalar.ph:
990; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
991; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
992; CHECK:       loop.header:
993; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
994; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
995; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
996; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
997; CHECK:       loop.body:
998; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
999; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1000; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1001; CHECK:       then:
1002; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1003; CHECK-NEXT:    br label [[LOOP_LATCH]]
1004; CHECK:       loop.latch:
1005; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1006; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
1007; CHECK:       exit:
1008; CHECK-NEXT:    ret void
1009;
1010; TAILFOLD-LABEL: @scalar_predication(
1011; TAILFOLD-NEXT:  entry:
1012; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1013; TAILFOLD:       loop.header:
1014; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1015; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1016; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1017; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1018; TAILFOLD:       loop.body:
1019; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1020; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1021; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1022; TAILFOLD:       then:
1023; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1024; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1025; TAILFOLD:       loop.latch:
1026; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1027; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1028; TAILFOLD:       exit:
1029; TAILFOLD-NEXT:    ret void
1030;
1031entry:
1032  br label %loop.header
1033
1034loop.header:
1035  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1036  %gep = getelementptr float, float* %addr, i64 %iv
1037  %exitcond.not = icmp eq i64 %iv, 200
1038  br i1 %exitcond.not, label %exit, label %loop.body
1039
1040loop.body:
1041  %0 = load float, float* %gep, align 4
1042  %pred = fcmp oeq float %0, 0.0
1043  br i1 %pred, label %loop.latch, label %then
1044
1045then:
1046  store float 10.0, float* %gep, align 4
1047  br label %loop.latch
1048
1049loop.latch:
1050  %iv.next = add nuw nsw i64 %iv, 1
1051  br label %loop.header
1052
1053exit:
1054  ret void
1055}
1056
1057define i32 @me_reduction(i32* %addr) {
1058; CHECK-LABEL: @me_reduction(
1059; CHECK-NEXT:  entry:
1060; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1061; CHECK:       vector.ph:
1062; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1063; CHECK:       vector.body:
1064; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1065; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1066; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1067; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1068; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1069; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1070; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1071; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1072; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1073; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1074; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1075; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1076; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1077; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
1078; CHECK:       middle.block:
1079; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]])
1080; CHECK-NEXT:    br label [[SCALAR_PH]]
1081; CHECK:       scalar.ph:
1082; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1083; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1084; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1085; CHECK:       loop.header:
1086; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1087; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1088; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1089; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1090; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1091; CHECK:       loop.latch:
1092; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1093; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1094; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1095; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1096; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
1097; CHECK:       exit:
1098; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1099; CHECK-NEXT:    ret i32 [[LCSSA]]
1100;
1101; TAILFOLD-LABEL: @me_reduction(
1102; TAILFOLD-NEXT:  entry:
1103; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1104; TAILFOLD:       loop.header:
1105; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1106; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1107; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1108; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1109; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1110; TAILFOLD:       loop.latch:
1111; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1112; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1113; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1114; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1115; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1116; TAILFOLD:       exit:
1117; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1118; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1119;
1120entry:
1121  br label %loop.header
1122
1123loop.header:
1124  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1125  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1126  %gep = getelementptr i32, i32* %addr, i64 %iv
1127  %exitcond.not = icmp eq i64 %iv, 200
1128  br i1 %exitcond.not, label %exit, label %loop.latch
1129
1130loop.latch:
1131  %0 = load i32, i32* %gep, align 4
1132  %accum.next = add i32 %accum, %0
1133  %iv.next = add nuw nsw i64 %iv, 1
1134  %exitcond2.not = icmp eq i64 %iv, 400
1135  br i1 %exitcond2.not, label %exit, label %loop.header
1136
1137exit:
1138  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1139  ret i32 %lcssa
1140}
1141
1142; TODO: The current definition of reduction is too strict, we can vectorize
1143; this.  There's an analogous single exit case where we extract the N-1
1144; value of the reduction that we can also handle.  If we fix the later, the
1145; multiple exit case probably falls out.
1146define i32 @me_reduction2(i32* %addr) {
1147; CHECK-LABEL: @me_reduction2(
1148; CHECK-NEXT:  entry:
1149; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1150; CHECK:       loop.header:
1151; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1152; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1153; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1154; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1155; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1156; CHECK:       loop.latch:
1157; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1158; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1159; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1160; CHECK-NEXT:    br label [[LOOP_HEADER]]
1161; CHECK:       exit:
1162; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1163; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1164;
1165; TAILFOLD-LABEL: @me_reduction2(
1166; TAILFOLD-NEXT:  entry:
1167; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1168; TAILFOLD:       loop.header:
1169; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1170; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1171; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1172; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1173; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1174; TAILFOLD:       loop.latch:
1175; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1176; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1177; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1178; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1179; TAILFOLD:       exit:
1180; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1181; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1182;
1183entry:
1184  br label %loop.header
1185
1186loop.header:
1187  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1188  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1189  %gep = getelementptr i32, i32* %addr, i64 %iv
1190  %exitcond.not = icmp eq i64 %iv, 200
1191  br i1 %exitcond.not, label %exit, label %loop.latch
1192
1193loop.latch:
1194  %0 = load i32, i32* %gep, align 4
1195  %accum.next = add i32 %accum, %0
1196  %iv.next = add nuw nsw i64 %iv, 1
1197  br label %loop.header
1198
1199exit:
1200  ret i32 %accum
1201}
1202
1203