xref: /llvm-project/llvm/test/Transforms/LoopVectorize/loop-form.ll (revision b46c085d2b6d15873fb53718f0a70b3848e19e4a)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
63; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 1
64; TAILFOLD-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
65; TAILFOLD-NEXT:    [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
66; TAILFOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
67; TAILFOLD-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
68; TAILFOLD:       pred.store.if:
69; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
70; TAILFOLD-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
71; TAILFOLD-NEXT:    store i16 0, i16* [[TMP7]], align 4
72; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
73; TAILFOLD:       pred.store.continue:
74; TAILFOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
75; TAILFOLD-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
76; TAILFOLD:       pred.store.if1:
77; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
78; TAILFOLD-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]]
79; TAILFOLD-NEXT:    store i16 0, i16* [[TMP10]], align 4
80; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
81; TAILFOLD:       pred.store.continue2:
82; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
83; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
84; TAILFOLD-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
85; TAILFOLD-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
86; TAILFOLD:       middle.block:
87; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
88; TAILFOLD:       scalar.ph:
89; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
90; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
91; TAILFOLD:       for.cond:
92; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
93; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
94; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
95; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
96; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
97; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
98; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
99; TAILFOLD:       if.end:
100; TAILFOLD-NEXT:    ret void
101;
102entry:
103  br label %for.cond
104
105for.cond:
106  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
107  %iprom = sext i32 %i to i64
108  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
109  store i16 0, i16* %b, align 4
110  %inc = add nsw i32 %i, 1
111  %cmp = icmp slt i32 %i, %n
112  br i1 %cmp, label %for.cond, label %if.end
113
114if.end:
115  ret void
116}
117
118define void @early_exit(i16* %p, i32 %n) {
119; CHECK-LABEL: @early_exit(
120; CHECK-NEXT:  entry:
121; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
122; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
123; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
124; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
125; CHECK:       vector.ph:
126; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
127; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
128; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
129; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
130; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
131; CHECK:       vector.body:
132; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
133; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
134; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
135; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
136; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
137; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
138; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
139; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
140; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
141; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
142; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
143; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
144; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
145; CHECK:       middle.block:
146; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
147; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
148; CHECK:       scalar.ph:
149; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
150; CHECK-NEXT:    br label [[FOR_COND:%.*]]
151; CHECK:       for.cond:
152; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
153; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
154; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
155; CHECK:       for.body:
156; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
157; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
158; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
159; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
160; CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
161; CHECK:       if.end:
162; CHECK-NEXT:    ret void
163;
164; TAILFOLD-LABEL: @early_exit(
165; TAILFOLD-NEXT:  entry:
166; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
167; TAILFOLD:       for.cond:
168; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
169; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
170; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
171; TAILFOLD:       for.body:
172; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
173; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
174; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
175; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
176; TAILFOLD-NEXT:    br label [[FOR_COND]]
177; TAILFOLD:       if.end:
178; TAILFOLD-NEXT:    ret void
179;
180entry:
181  br label %for.cond
182
183for.cond:
184  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
185  %cmp = icmp slt i32 %i, %n
186  br i1 %cmp, label %for.body, label %if.end
187
188for.body:
189  %iprom = sext i32 %i to i64
190  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
191  store i16 0, i16* %b, align 4
192  %inc = add nsw i32 %i, 1
193  br label %for.cond
194
195if.end:
196  ret void
197}
198
199; Same as early_exit, but with optsize to prevent the use of
200; a scalar epilogue.  -- Can't vectorize this in either case.
201define void @optsize(i16* %p, i32 %n) optsize {
202; CHECK-LABEL: @optsize(
203; CHECK-NEXT:  entry:
204; CHECK-NEXT:    br label [[FOR_COND:%.*]]
205; CHECK:       for.cond:
206; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
207; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
208; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
209; CHECK:       for.body:
210; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
211; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
212; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
213; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
214; CHECK-NEXT:    br label [[FOR_COND]]
215; CHECK:       if.end:
216; CHECK-NEXT:    ret void
217;
218; TAILFOLD-LABEL: @optsize(
219; TAILFOLD-NEXT:  entry:
220; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
221; TAILFOLD:       for.cond:
222; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
223; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
224; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
225; TAILFOLD:       for.body:
226; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
227; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
228; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
229; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
230; TAILFOLD-NEXT:    br label [[FOR_COND]]
231; TAILFOLD:       if.end:
232; TAILFOLD-NEXT:    ret void
233;
234entry:
235  br label %for.cond
236
237for.cond:
238  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
239  %cmp = icmp slt i32 %i, %n
240  br i1 %cmp, label %for.body, label %if.end
241
242for.body:
243  %iprom = sext i32 %i to i64
244  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
245  store i16 0, i16* %b, align 4
246  %inc = add nsw i32 %i, 1
247  br label %for.cond
248
249if.end:
250  ret void
251}
252
253
254; multiple exit - no values inside the loop used outside
255define void @multiple_unique_exit(i16* %p, i32 %n) {
256; CHECK-LABEL: @multiple_unique_exit(
257; CHECK-NEXT:  entry:
258; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
259; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
260; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
261; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
262; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
263; CHECK:       vector.ph:
264; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
265; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
266; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
267; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
268; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
269; CHECK:       vector.body:
270; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
271; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
272; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
273; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
274; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
275; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
276; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
277; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
278; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
279; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
280; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
281; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
282; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
283; CHECK:       middle.block:
284; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
285; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
286; CHECK:       scalar.ph:
287; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
288; CHECK-NEXT:    br label [[FOR_COND:%.*]]
289; CHECK:       for.cond:
290; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
291; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
292; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
293; CHECK:       for.body:
294; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
295; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
296; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
297; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
298; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
299; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
300; CHECK:       if.end:
301; CHECK-NEXT:    ret void
302;
303; TAILFOLD-LABEL: @multiple_unique_exit(
304; TAILFOLD-NEXT:  entry:
305; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
306; TAILFOLD:       for.cond:
307; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
308; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
309; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
310; TAILFOLD:       for.body:
311; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
312; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
313; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
314; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
315; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
316; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
317; TAILFOLD:       if.end:
318; TAILFOLD-NEXT:    ret void
319;
320entry:
321  br label %for.cond
322
323for.cond:
324  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
325  %cmp = icmp slt i32 %i, %n
326  br i1 %cmp, label %for.body, label %if.end
327
328for.body:
329  %iprom = sext i32 %i to i64
330  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
331  store i16 0, i16* %b, align 4
332  %inc = add nsw i32 %i, 1
333  %cmp2 = icmp slt i32 %i, 2096
334  br i1 %cmp2, label %for.cond, label %if.end
335
336if.end:
337  ret void
338}
339
340; multiple exit - with an lcssa phi
341define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
342; CHECK-LABEL: @multiple_unique_exit2(
343; CHECK-NEXT:  entry:
344; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
345; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
346; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
347; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
348; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
349; CHECK:       vector.ph:
350; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
351; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
352; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
353; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
354; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
355; CHECK:       vector.body:
356; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
357; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
358; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
359; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
360; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
361; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
362; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
363; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
364; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
365; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
366; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
367; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
368; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
369; CHECK:       middle.block:
370; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
371; CHECK-NEXT:    [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1
372; CHECK-NEXT:    [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1
373; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
374; CHECK:       scalar.ph:
375; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
376; CHECK-NEXT:    br label [[FOR_COND:%.*]]
377; CHECK:       for.cond:
378; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
379; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
380; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
381; CHECK:       for.body:
382; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
383; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
384; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
385; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
386; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
387; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
388; CHECK:       if.end:
389; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ]
390; CHECK-NEXT:    ret i32 [[I_LCSSA]]
391;
392; TAILFOLD-LABEL: @multiple_unique_exit2(
393; TAILFOLD-NEXT:  entry:
394; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
395; TAILFOLD:       for.cond:
396; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
397; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
398; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
399; TAILFOLD:       for.body:
400; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
401; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
402; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
403; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
404; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
405; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
406; TAILFOLD:       if.end:
407; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
408; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
409;
410entry:
411  br label %for.cond
412
413for.cond:
414  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
415  %cmp = icmp slt i32 %i, %n
416  br i1 %cmp, label %for.body, label %if.end
417
418for.body:
419  %iprom = sext i32 %i to i64
420  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
421  store i16 0, i16* %b, align 4
422  %inc = add nsw i32 %i, 1
423  %cmp2 = icmp slt i32 %i, 2096
424  br i1 %cmp2, label %for.cond, label %if.end
425
426if.end:
427  ret i32 %i
428}
429
430; multiple exit w/a non lcssa phi
431define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
432; CHECK-LABEL: @multiple_unique_exit3(
433; CHECK-NEXT:  entry:
434; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
435; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
436; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
437; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
438; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
439; CHECK:       vector.ph:
440; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
441; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
442; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
443; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
444; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
445; CHECK:       vector.body:
446; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
447; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
448; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
449; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
450; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
451; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
452; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
453; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
454; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
455; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
456; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
457; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
458; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
459; CHECK:       middle.block:
460; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
461; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
462; CHECK:       scalar.ph:
463; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
464; CHECK-NEXT:    br label [[FOR_COND:%.*]]
465; CHECK:       for.cond:
466; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
467; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
468; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
469; CHECK:       for.body:
470; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
471; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
472; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
473; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
474; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
475; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]]
476; CHECK:       if.end:
477; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ]
478; CHECK-NEXT:    ret i32 [[EXIT]]
479;
480; TAILFOLD-LABEL: @multiple_unique_exit3(
481; TAILFOLD-NEXT:  entry:
482; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
483; TAILFOLD:       for.cond:
484; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
485; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
486; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
487; TAILFOLD:       for.body:
488; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
489; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
490; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
491; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
492; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
493; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
494; TAILFOLD:       if.end:
495; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
496; TAILFOLD-NEXT:    ret i32 [[EXIT]]
497;
498entry:
499  br label %for.cond
500
501for.cond:
502  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
503  %cmp = icmp slt i32 %i, %n
504  br i1 %cmp, label %for.body, label %if.end
505
506for.body:
507  %iprom = sext i32 %i to i64
508  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
509  store i16 0, i16* %b, align 4
510  %inc = add nsw i32 %i, 1
511  %cmp2 = icmp slt i32 %i, 2096
512  br i1 %cmp2, label %for.cond, label %if.end
513
514if.end:
515  %exit = phi i32 [0, %for.cond], [1, %for.body]
516  ret i32 %exit
517}
518
519; multiple exits w/distinct target blocks
520define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
521; CHECK-LABEL: @multiple_exit_blocks(
522; CHECK-NEXT:  entry:
523; CHECK-NEXT:    br label [[FOR_COND:%.*]]
524; CHECK:       for.cond:
525; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
526; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
527; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
528; CHECK:       for.body:
529; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
530; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
531; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
532; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
533; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
534; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
535; CHECK:       if.end:
536; CHECK-NEXT:    ret i32 0
537; CHECK:       if.end2:
538; CHECK-NEXT:    ret i32 1
539;
540; TAILFOLD-LABEL: @multiple_exit_blocks(
541; TAILFOLD-NEXT:  entry:
542; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
543; TAILFOLD:       for.cond:
544; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
545; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
546; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
547; TAILFOLD:       for.body:
548; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
549; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
550; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
551; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
552; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
553; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
554; TAILFOLD:       if.end:
555; TAILFOLD-NEXT:    ret i32 0
556; TAILFOLD:       if.end2:
557; TAILFOLD-NEXT:    ret i32 1
558;
559entry:
560  br label %for.cond
561
562for.cond:
563  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
564  %cmp = icmp slt i32 %i, %n
565  br i1 %cmp, label %for.body, label %if.end
566
567for.body:
568  %iprom = sext i32 %i to i64
569  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
570  store i16 0, i16* %b, align 4
571  %inc = add nsw i32 %i, 1
572  %cmp2 = icmp slt i32 %i, 2096
573  br i1 %cmp2, label %for.cond, label %if.end2
574
575if.end:
576  ret i32 0
577
578if.end2:
579  ret i32 1
580}
581
582; LCSSA, common value each exit
583define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
584; CHECK-LABEL: @multiple_exit_blocks2(
585; CHECK-NEXT:  entry:
586; CHECK-NEXT:    br label [[FOR_COND:%.*]]
587; CHECK:       for.cond:
588; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
589; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
590; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
591; CHECK:       for.body:
592; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
593; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
594; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
595; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
596; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
597; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
598; CHECK:       if.end:
599; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
600; CHECK-NEXT:    ret i32 [[I_LCSSA]]
601; CHECK:       if.end2:
602; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
603; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
604;
605; TAILFOLD-LABEL: @multiple_exit_blocks2(
606; TAILFOLD-NEXT:  entry:
607; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
608; TAILFOLD:       for.cond:
609; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
610; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
611; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
612; TAILFOLD:       for.body:
613; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
614; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
615; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
616; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
617; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
618; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
619; TAILFOLD:       if.end:
620; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
621; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
622; TAILFOLD:       if.end2:
623; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
624; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
625;
626entry:
627  br label %for.cond
628
629for.cond:
630  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
631  %cmp = icmp slt i32 %i, %n
632  br i1 %cmp, label %for.body, label %if.end
633
634for.body:
635  %iprom = sext i32 %i to i64
636  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
637  store i16 0, i16* %b, align 4
638  %inc = add nsw i32 %i, 1
639  %cmp2 = icmp slt i32 %i, 2096
640  br i1 %cmp2, label %for.cond, label %if.end2
641
642if.end:
643  ret i32 %i
644
645if.end2:
646  ret i32 %i
647}
648
649; LCSSA, distinct value each exit
650define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
651; CHECK-LABEL: @multiple_exit_blocks3(
652; CHECK-NEXT:  entry:
653; CHECK-NEXT:    br label [[FOR_COND:%.*]]
654; CHECK:       for.cond:
655; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
656; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
657; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
658; CHECK:       for.body:
659; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
660; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
661; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
662; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
663; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
664; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
665; CHECK:       if.end:
666; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
667; CHECK-NEXT:    ret i32 [[I_LCSSA]]
668; CHECK:       if.end2:
669; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
670; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
671;
672; TAILFOLD-LABEL: @multiple_exit_blocks3(
673; TAILFOLD-NEXT:  entry:
674; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
675; TAILFOLD:       for.cond:
676; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
677; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
678; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
679; TAILFOLD:       for.body:
680; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
681; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
682; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
683; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
684; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
685; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
686; TAILFOLD:       if.end:
687; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
688; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
689; TAILFOLD:       if.end2:
690; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
691; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
692;
693entry:
694  br label %for.cond
695
696for.cond:
697  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
698  %cmp = icmp slt i32 %i, %n
699  br i1 %cmp, label %for.body, label %if.end
700
701for.body:
702  %iprom = sext i32 %i to i64
703  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
704  store i16 0, i16* %b, align 4
705  %inc = add nsw i32 %i, 1
706  %cmp2 = icmp slt i32 %i, 2096
707  br i1 %cmp2, label %for.cond, label %if.end2
708
709if.end:
710  ret i32 %i
711
712if.end2:
713  ret i32 %inc
714}
715
716; unique exit case but with a switch as two edges between the same pair of
717; blocks is an often missed edge case
718define i32 @multiple_exit_switch(i16* %p, i32 %n) {
719; CHECK-LABEL: @multiple_exit_switch(
720; CHECK-NEXT:  entry:
721; CHECK-NEXT:    br label [[FOR_COND:%.*]]
722; CHECK:       for.cond:
723; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
724; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
725; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
726; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
727; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
728; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
729; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
730; CHECK-NEXT:    i32 2097, label [[IF_END]]
731; CHECK-NEXT:    ]
732; CHECK:       if.end:
733; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
734; CHECK-NEXT:    ret i32 [[I_LCSSA]]
735;
736; TAILFOLD-LABEL: @multiple_exit_switch(
737; TAILFOLD-NEXT:  entry:
738; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
739; TAILFOLD:       for.cond:
740; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
741; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
742; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
743; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
744; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
745; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
746; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
747; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
748; TAILFOLD-NEXT:    ]
749; TAILFOLD:       if.end:
750; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
751; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
752;
753entry:
754  br label %for.cond
755
756for.cond:
757  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
758  %iprom = sext i32 %i to i64
759  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
760  store i16 0, i16* %b, align 4
761  %inc = add nsw i32 %i, 1
762  switch i32 %i, label %for.cond [
763  i32 2096, label %if.end
764  i32 2097, label %if.end
765  ]
766
767if.end:
768  ret i32 %i
769}
770
771; multiple exit case but with a switch as multiple exiting edges from
772; a single block is a commonly missed edge case
773define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
774; CHECK-LABEL: @multiple_exit_switch2(
775; CHECK-NEXT:  entry:
776; CHECK-NEXT:    br label [[FOR_COND:%.*]]
777; CHECK:       for.cond:
778; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
779; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
780; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
781; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
782; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
783; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
784; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
785; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
786; CHECK-NEXT:    ]
787; CHECK:       if.end:
788; CHECK-NEXT:    ret i32 0
789; CHECK:       if.end2:
790; CHECK-NEXT:    ret i32 1
791;
792; TAILFOLD-LABEL: @multiple_exit_switch2(
793; TAILFOLD-NEXT:  entry:
794; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
795; TAILFOLD:       for.cond:
796; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
797; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
798; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
799; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
800; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
801; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
802; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
803; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
804; TAILFOLD-NEXT:    ]
805; TAILFOLD:       if.end:
806; TAILFOLD-NEXT:    ret i32 0
807; TAILFOLD:       if.end2:
808; TAILFOLD-NEXT:    ret i32 1
809;
810entry:
811  br label %for.cond
812
813for.cond:
814  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
815  %iprom = sext i32 %i to i64
816  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
817  store i16 0, i16* %b, align 4
818  %inc = add nsw i32 %i, 1
819  switch i32 %i, label %for.cond [
820  i32 2096, label %if.end
821  i32 2097, label %if.end2
822  ]
823
824if.end:
825  ret i32 0
826
827if.end2:
828  ret i32 1
829}
830
831define i32 @multiple_latch1(i16* %p) {
832; CHECK-LABEL: @multiple_latch1(
833; CHECK-NEXT:  entry:
834; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
835; CHECK:       for.body:
836; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
837; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
838; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
839; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
840; CHECK:       for.second:
841; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
842; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
843; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
844; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
845; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
846; CHECK:       for.body.backedge:
847; CHECK-NEXT:    br label [[FOR_BODY]]
848; CHECK:       for.end:
849; CHECK-NEXT:    ret i32 0
850;
851; TAILFOLD-LABEL: @multiple_latch1(
852; TAILFOLD-NEXT:  entry:
853; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
854; TAILFOLD:       for.body:
855; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
856; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
857; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
858; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
859; TAILFOLD:       for.second:
860; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
861; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
862; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
863; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
864; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
865; TAILFOLD:       for.body.backedge:
866; TAILFOLD-NEXT:    br label [[FOR_BODY]]
867; TAILFOLD:       for.end:
868; TAILFOLD-NEXT:    ret i32 0
869;
870entry:
871  br label %for.body
872
873for.body:
874  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
875  %inc = add nsw i32 %i.02, 1
876  %cmp = icmp slt i32 %inc, 16
877  br i1 %cmp, label %for.body.backedge, label %for.second
878
879for.second:
880  %iprom = sext i32 %i.02 to i64
881  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
882  store i16 0, i16* %b, align 4
883  %cmps = icmp sgt i32 %inc, 16
884  br i1 %cmps, label %for.body.backedge, label %for.end
885
886for.body.backedge:
887  br label %for.body
888
889for.end:
890  ret i32 0
891}
892
893
894; two back branches - loop simplify with convert this to the same form
895; as previous before vectorizer sees it, but show that.
896define i32 @multiple_latch2(i16* %p) {
897; CHECK-LABEL: @multiple_latch2(
898; CHECK-NEXT:  entry:
899; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
900; CHECK:       for.body:
901; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
902; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
903; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
904; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
905; CHECK:       for.body.backedge:
906; CHECK-NEXT:    br label [[FOR_BODY]]
907; CHECK:       for.second:
908; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
909; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
910; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
911; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
912; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
913; CHECK:       for.end:
914; CHECK-NEXT:    ret i32 0
915;
916; TAILFOLD-LABEL: @multiple_latch2(
917; TAILFOLD-NEXT:  entry:
918; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
919; TAILFOLD:       for.body:
920; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
921; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
922; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
923; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
924; TAILFOLD:       for.body.backedge:
925; TAILFOLD-NEXT:    br label [[FOR_BODY]]
926; TAILFOLD:       for.second:
927; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
928; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
929; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
930; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
931; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
932; TAILFOLD:       for.end:
933; TAILFOLD-NEXT:    ret i32 0
934;
935entry:
936  br label %for.body
937
938for.body:
939  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
940  %inc = add nsw i32 %i.02, 1
941  %cmp = icmp slt i32 %inc, 16
942  br i1 %cmp, label %for.body, label %for.second
943
944for.second:
945  %iprom = sext i32 %i.02 to i64
946  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
947  store i16 0, i16* %b, align 4
948  %cmps = icmp sgt i32 %inc, 16
949  br i1 %cmps, label %for.body, label %for.end
950
951for.end:
952  ret i32 0
953}
954
955
956; Check interaction between block predication and early exits.  We need the
957; condition on the early exit to remain dead (i.e. not be used when forming
958; the predicate mask).
959define void @scalar_predication(float* %addr) {
960; CHECK-LABEL: @scalar_predication(
961; CHECK-NEXT:  entry:
962; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
963; CHECK:       vector.ph:
964; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
965; CHECK:       vector.body:
966; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
967; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
968; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
969; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
970; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
971; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
972; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
973; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
974; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
975; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
976; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
977; CHECK:       pred.store.if:
978; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
979; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
980; CHECK:       pred.store.continue:
981; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
982; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
983; CHECK:       pred.store.if1:
984; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
985; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
986; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
987; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
988; CHECK:       pred.store.continue2:
989; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
990; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
991; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
992; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
993; CHECK:       middle.block:
994; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
995; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
996; CHECK:       scalar.ph:
997; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
998; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
999; CHECK:       loop.header:
1000; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1001; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1002; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1003; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]]
1004; CHECK:       loop.body:
1005; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
1006; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1007; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1008; CHECK:       then:
1009; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1010; CHECK-NEXT:    br label [[LOOP_LATCH]]
1011; CHECK:       loop.latch:
1012; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1013; CHECK-NEXT:    br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]]
1014; CHECK:       exit:
1015; CHECK-NEXT:    ret void
1016;
1017; TAILFOLD-LABEL: @scalar_predication(
1018; TAILFOLD-NEXT:  entry:
1019; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1020; TAILFOLD:       loop.header:
1021; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1022; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1023; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1024; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1025; TAILFOLD:       loop.body:
1026; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1027; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1028; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1029; TAILFOLD:       then:
1030; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1031; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1032; TAILFOLD:       loop.latch:
1033; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1034; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1035; TAILFOLD:       exit:
1036; TAILFOLD-NEXT:    ret void
1037;
1038entry:
1039  br label %loop.header
1040
1041loop.header:
1042  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1043  %gep = getelementptr float, float* %addr, i64 %iv
1044  %exitcond.not = icmp eq i64 %iv, 200
1045  br i1 %exitcond.not, label %exit, label %loop.body
1046
1047loop.body:
1048  %0 = load float, float* %gep, align 4
1049  %pred = fcmp oeq float %0, 0.0
1050  br i1 %pred, label %loop.latch, label %then
1051
1052then:
1053  store float 10.0, float* %gep, align 4
1054  br label %loop.latch
1055
1056loop.latch:
1057  %iv.next = add nuw nsw i64 %iv, 1
1058  br label %loop.header
1059
1060exit:
1061  ret void
1062}
1063
1064define i32 @me_reduction(i32* %addr) {
1065; CHECK-LABEL: @me_reduction(
1066; CHECK-NEXT:  entry:
1067; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1068; CHECK:       vector.ph:
1069; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1070; CHECK:       vector.body:
1071; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1072; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1073; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1074; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1075; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1076; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1077; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1078; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1079; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1080; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1081; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1082; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1083; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1084; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
1085; CHECK:       middle.block:
1086; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]])
1087; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
1088; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1089; CHECK:       scalar.ph:
1090; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1091; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1092; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1093; CHECK:       loop.header:
1094; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1095; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1096; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1097; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1098; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
1099; CHECK:       loop.latch:
1100; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1101; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1102; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1103; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1104; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]]
1105; CHECK:       exit:
1106; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1107; CHECK-NEXT:    ret i32 [[LCSSA]]
1108;
1109; TAILFOLD-LABEL: @me_reduction(
1110; TAILFOLD-NEXT:  entry:
1111; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1112; TAILFOLD:       loop.header:
1113; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1114; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1115; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1116; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1117; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1118; TAILFOLD:       loop.latch:
1119; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1120; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1121; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1122; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1123; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1124; TAILFOLD:       exit:
1125; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1126; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1127;
1128entry:
1129  br label %loop.header
1130
1131loop.header:
1132  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1133  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1134  %gep = getelementptr i32, i32* %addr, i64 %iv
1135  %exitcond.not = icmp eq i64 %iv, 200
1136  br i1 %exitcond.not, label %exit, label %loop.latch
1137
1138loop.latch:
1139  %0 = load i32, i32* %gep, align 4
1140  %accum.next = add i32 %accum, %0
1141  %iv.next = add nuw nsw i64 %iv, 1
1142  %exitcond2.not = icmp eq i64 %iv, 400
1143  br i1 %exitcond2.not, label %exit, label %loop.header
1144
1145exit:
1146  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1147  ret i32 %lcssa
1148}
1149
1150; TODO: The current definition of reduction is too strict, we can vectorize
1151; this.  There's an analogous single exit case where we extract the N-1
1152; value of the reduction that we can also handle.  If we fix the later, the
1153; multiple exit case probably falls out.
1154define i32 @me_reduction2(i32* %addr) {
1155; CHECK-LABEL: @me_reduction2(
1156; CHECK-NEXT:  entry:
1157; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1158; CHECK:       loop.header:
1159; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1160; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1161; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1162; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1163; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1164; CHECK:       loop.latch:
1165; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1166; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1167; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1168; CHECK-NEXT:    br label [[LOOP_HEADER]]
1169; CHECK:       exit:
1170; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1171; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1172;
1173; TAILFOLD-LABEL: @me_reduction2(
1174; TAILFOLD-NEXT:  entry:
1175; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1176; TAILFOLD:       loop.header:
1177; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1178; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1179; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1180; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1181; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1182; TAILFOLD:       loop.latch:
1183; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1184; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1185; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1186; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1187; TAILFOLD:       exit:
1188; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1189; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1190;
1191entry:
1192  br label %loop.header
1193
1194loop.header:
1195  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1196  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1197  %gep = getelementptr i32, i32* %addr, i64 %iv
1198  %exitcond.not = icmp eq i64 %iv, 200
1199  br i1 %exitcond.not, label %exit, label %loop.latch
1200
1201loop.latch:
1202  %0 = load i32, i32* %gep, align 4
1203  %accum.next = add i32 %accum, %0
1204  %iv.next = add nuw nsw i64 %iv, 1
1205  br label %loop.header
1206
1207exit:
1208  ret i32 %accum
1209}
1210
1211