xref: /llvm-project/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
3
4
5@p = external local_unnamed_addr global [257 x i32], align 16
6@q = external local_unnamed_addr global [257 x i32], align 16
7
8; Test case for PR43398.
9
10define void @can_sink_after_store(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
11; CHECK-LABEL: @can_sink_after_store(
12; CHECK-NEXT:  entry:
13; CHECK-NEXT:    br label [[PREHEADER:%.*]]
14; CHECK:       preheader:
15; CHECK-NEXT:    [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
16; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
17; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
18; CHECK:       vector.ph:
19; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
20; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
21; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
22; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
23; CHECK:       vector.body:
24; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
26; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
27; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
28; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
29; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
30; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
31; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
32; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
33; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
34; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
35; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
36; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
37; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
38; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
39; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
40; CHECK:       middle.block:
41; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
42; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
43; CHECK:       scalar.ph:
44; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
45; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
46; CHECK-NEXT:    br label [[FOR:%.*]]
47; CHECK:       for:
48; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
49; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
50; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[SCALAR_RECUR]], [[X]]
51; CHECK-NEXT:    [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
52; CHECK-NEXT:    [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
53; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
54; CHECK-NEXT:    [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
55; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[IDX_2]], align 4
56; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
57; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
58; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP3:![0-9]+]]
59; CHECK:       exit:
60; CHECK-NEXT:    ret void
61;
62
63entry:
64  br label %preheader
65
66preheader:
67  %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
68  %.pre = load i32, ptr %idx.phi.trans, align 4
69  br label %for
70
71for:
72  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
73  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
74  %add.1 = add i32 %pre.phi, %x
75  %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
76  %pre.next = load i32, ptr %idx.1, align 4
77  %add.2 = add i32 %add.1, %pre.next
78  %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
79  store i32 %add.2, ptr %idx.2, align 4
80  %iv.next = add nuw nsw i64 %iv, 1
81  %exitcond = icmp eq i64 %iv.next, 2000
82  br i1 %exitcond, label %exit, label %for
83
84exit:
85  ret void
86}
87
88; We can sink potential trapping instructions, as this will only delay the trap
89; and not introduce traps on additional paths.
90define void @sink_sdiv(i32 %x, ptr %ptr, i64 %tc) local_unnamed_addr #0 {
91; CHECK-LABEL: @sink_sdiv(
92; CHECK-NEXT:  entry:
93; CHECK-NEXT:    br label [[PREHEADER:%.*]]
94; CHECK:       preheader:
95; CHECK-NEXT:    [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
96; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
97; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
98; CHECK:       vector.ph:
99; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
100; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
101; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
102; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
103; CHECK:       vector.body:
104; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
105; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
106; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
107; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
108; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
109; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
110; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
111; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
112; CHECK-NEXT:    [[TMP4:%.*]] = sdiv <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
113; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
114; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
115; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
116; CHECK-NEXT:    store <4 x i32> [[TMP5]], ptr [[TMP7]], align 4
117; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
118; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
119; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
120; CHECK:       middle.block:
121; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
122; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
123; CHECK:       scalar.ph:
124; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
125; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
126; CHECK-NEXT:    br label [[FOR:%.*]]
127; CHECK:       for:
128; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
129; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
130; CHECK-NEXT:    [[DIV_1:%.*]] = sdiv i32 [[SCALAR_RECUR]], [[X]]
131; CHECK-NEXT:    [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
132; CHECK-NEXT:    [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
133; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[DIV_1]], [[PRE_NEXT]]
134; CHECK-NEXT:    [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
135; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[IDX_2]], align 4
136; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
137; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
138; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP5:![0-9]+]]
139; CHECK:       exit:
140; CHECK-NEXT:    ret void
141;
142
143entry:
144  br label %preheader
145
146preheader:
147  %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
148  %.pre = load i32, ptr %idx.phi.trans, align 4
149  br label %for
150
151for:
152  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
153  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
154  %div.1 = sdiv i32 %pre.phi, %x
155  %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
156  %pre.next = load i32, ptr %idx.1, align 4
157  %add.2 = add i32 %div.1, %pre.next
158  %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
159  store i32 %add.2, ptr %idx.2, align 4
160  %iv.next = add nuw nsw i64 %iv, 1
161  %exitcond = icmp eq i64 %iv.next, 2000
162  br i1 %exitcond, label %exit, label %for
163
164exit:
165  ret void
166}
167
168; Sink users of %pre.phi recursively.
169define void @can_sink_with_additional_user(i32 %x, ptr %ptr, i64 %tc) {
170; CHECK-LABEL: @can_sink_with_additional_user(
171; CHECK-NEXT:  entry:
172; CHECK-NEXT:    br label [[PREHEADER:%.*]]
173; CHECK:       preheader:
174; CHECK-NEXT:    [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
175; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
176; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
177; CHECK:       vector.ph:
178; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
179; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
180; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
181; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
182; CHECK:       vector.body:
183; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
184; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
185; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
186; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
187; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[TMP0]]
188; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
189; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, ptr [[TMP2]], align 4
190; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
191; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
192; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
193; CHECK-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
194; CHECK-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[TMP5]], [[TMP6]]
195; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[TMP0]]
196; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0
197; CHECK-NEXT:    store <4 x i32> [[TMP7]], ptr [[TMP9]], align 4
198; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
199; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1996
200; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
201; CHECK:       middle.block:
202; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
203; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
204; CHECK:       scalar.ph:
205; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ [[DOTPRE]], [[PREHEADER]] ]
206; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1997, [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ]
207; CHECK-NEXT:    br label [[FOR:%.*]]
208; CHECK:       for:
209; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
210; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
211; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[SCALAR_RECUR]], [[X]]
212; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[X]]
213; CHECK-NEXT:    [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
214; CHECK-NEXT:    [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
215; CHECK-NEXT:    [[ADD_3:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
216; CHECK-NEXT:    [[ADD_4:%.*]] = add i32 [[ADD_2]], [[ADD_3]]
217; CHECK-NEXT:    [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
218; CHECK-NEXT:    store i32 [[ADD_4]], ptr [[IDX_2]], align 4
219; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
220; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
221; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR]], !llvm.loop [[LOOP7:![0-9]+]]
222; CHECK:       exit:
223; CHECK-NEXT:    ret void
224;
225
226
227
228entry:
229  br label %preheader
230
231preheader:
232  %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
233  %.pre = load i32, ptr %idx.phi.trans, align 4
234  br label %for
235
236for:
237  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
238  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
239  %add.1 = add i32 %pre.phi, %x
240  %add.2 = add i32 %add.1, %x
241  %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
242  %pre.next = load i32, ptr %idx.1, align 4
243  %add.3 = add i32 %add.1, %pre.next
244  %add.4 = add i32 %add.2, %add.3
245  %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
246  store i32 %add.4, ptr %idx.2, align 4
247  %iv.next = add nuw nsw i64 %iv, 1
248  %exitcond = icmp eq i64 %iv.next, 2000
249  br i1 %exitcond, label %exit, label %for
250
251exit:
252  ret void
253}
254
255; FIXME: We can sink a store, if we can guarantee that it does not alias any
256;        loads/stores in between.
257define void @cannot_sink_store(i32 %x, ptr %ptr, i64 %tc) {
258; CHECK-LABEL: @cannot_sink_store(
259; CHECK-NEXT:  entry:
260; CHECK-NEXT:    br label [[PREHEADER:%.*]]
261; CHECK:       preheader:
262; CHECK-NEXT:    [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
263; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
264; CHECK-NEXT:    br label [[FOR:%.*]]
265; CHECK:       for:
266; CHECK-NEXT:    [[PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[PRE_NEXT:%.*]], [[FOR]] ]
267; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 1, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
268; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[PRE_PHI]], [[X:%.*]]
269; CHECK-NEXT:    store i32 [[ADD_1]], ptr [[PTR:%.*]], align 4
270; CHECK-NEXT:    [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
271; CHECK-NEXT:    [[PRE_NEXT]] = load i32, ptr [[IDX_1]], align 4
272; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[ADD_1]], [[PRE_NEXT]]
273; CHECK-NEXT:    [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
274; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[IDX_2]], align 4
275; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
276; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
277; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR]]
278; CHECK:       exit:
279; CHECK-NEXT:    ret void
280;
281
282
283
284entry:
285  br label %preheader
286
287preheader:
288  %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
289  %.pre = load i32, ptr %idx.phi.trans, align 4
290  br label %for
291
292for:
293  %pre.phi = phi i32 [ %.pre, %preheader ], [ %pre.next, %for ]
294  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
295  %add.1 = add i32 %pre.phi, %x
296  store i32 %add.1, ptr %ptr
297  %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
298  %pre.next = load i32, ptr %idx.1, align 4
299  %add.2 = add i32 %add.1, %pre.next
300  %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
301  store i32 %add.2, ptr %idx.2, align 4
302  %iv.next = add nuw nsw i64 %iv, 1
303  %exitcond = icmp eq i64 %iv.next, 2000
304  br i1 %exitcond, label %exit, label %for
305
306exit:
307  ret void
308}
309
310; Some kinds of reductions are not detected by IVDescriptors. If we have a
311; cycle, we cannot sink it.
312define void @cannot_sink_reduction(i32 %x, ptr %ptr, i64 %tc) {
313; CHECK-LABEL: @cannot_sink_reduction(
314; CHECK-NEXT:  entry:
315; CHECK-NEXT:    br label [[PREHEADER:%.*]]
316; CHECK:       preheader:
317; CHECK-NEXT:    [[IDX_PHI_TRANS:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
318; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, ptr [[IDX_PHI_TRANS]], align 4
319; CHECK-NEXT:    br label [[FOR:%.*]]
320; CHECK:       for:
321; CHECK-NEXT:    [[PRE_PHI:%.*]] = phi i32 [ [[DOTPRE]], [[PREHEADER]] ], [ [[D:%.*]], [[FOR]] ]
322; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 1, [[PREHEADER]] ], [ [[IV_NEXT:%.*]], [[FOR]] ]
323; CHECK-NEXT:    [[D]] = sdiv i32 [[PRE_PHI]], [[X:%.*]]
324; CHECK-NEXT:    [[IDX_1:%.*]] = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 [[IV]]
325; CHECK-NEXT:    [[PRE_NEXT:%.*]] = load i32, ptr [[IDX_1]], align 4
326; CHECK-NEXT:    [[ADD_2:%.*]] = add i32 [[X]], [[PRE_NEXT]]
327; CHECK-NEXT:    [[IDX_2:%.*]] = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 [[IV]]
328; CHECK-NEXT:    store i32 [[ADD_2]], ptr [[IDX_2]], align 4
329; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
330; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 2000
331; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[FOR]]
332; CHECK:       exit:
333; CHECK-NEXT:    ret void
334;
335entry:
336  br label %preheader
337
338preheader:
339  %idx.phi.trans = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 1
340  %.pre = load i32, ptr %idx.phi.trans, align 4
341  br label %for
342
343for:
344  %pre.phi = phi i32 [ %.pre, %preheader ], [ %d, %for ]
345  %iv = phi i64 [ 1, %preheader ], [ %iv.next, %for ]
346  %d = sdiv i32 %pre.phi, %x
347  %idx.1 = getelementptr inbounds [257 x i32], ptr @p, i64 0, i64 %iv
348  %pre.next = load i32, ptr %idx.1, align 4
349  %add.2 = add i32 %x, %pre.next
350  %idx.2 = getelementptr inbounds [257 x i32], ptr @q, i64 0, i64 %iv
351  store i32 %add.2, ptr %idx.2, align 4
352  %iv.next = add nuw nsw i64 %iv, 1
353  %exitcond = icmp eq i64 %iv.next, 2000
354  br i1 %exitcond, label %exit, label %for
355
356exit:
357  ret void
358}
359
360; Sink %tmp38 after %tmp60, then it enable the loop vectorization.
361define void @instruction_with_2_FOR_operands(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
362; CHECK-LABEL: @instruction_with_2_FOR_operands(
363; CHECK-NEXT:  bb:
364; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
365; CHECK:       vector.ph:
366; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
367; CHECK:       vector.body:
368; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
369; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
370; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
371; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
372; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]]
373; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[A:%.*]], align 4
374; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP2]], i64 0
375; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
376; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
377; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr [[B:%.*]], align 4
378; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP4]], i64 0
379; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
380; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
381; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], [[TMP3]]
382; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
383; CHECK-NEXT:    store <4 x float> [[TMP6]], ptr [[TMP7]], align 4
384; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
386; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
387; CHECK:       middle.block:
388; CHECK-NEXT:    br i1 false, label [[BB74:%.*]], label [[SCALAR_PH]]
389; CHECK:       scalar.ph:
390; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
391; CHECK-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[BB]] ]
392; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
393; CHECK-NEXT:    br label [[BB13:%.*]]
394; CHECK:       bb13:
395; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi float [ [[TMP60:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
396; CHECK-NEXT:    [[SCALAR_RECUR5:%.*]] = phi float [ [[TMP49:%.*]], [[BB13]] ], [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ]
397; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BB13]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
398; CHECK-NEXT:    [[TMP38:%.*]] = fmul fast float [[SCALAR_RECUR]], [[SCALAR_RECUR5]]
399; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
400; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[IV]]
401; CHECK-NEXT:    [[TMP49]] = load float, ptr [[A]], align 4
402; CHECK-NEXT:    [[TMP60]] = load float, ptr [[B]], align 4
403; CHECK-NEXT:    store float [[TMP38]], ptr [[GEP]], align 4
404; CHECK-NEXT:    [[TMP12:%.*]] = icmp slt i64 [[IV]], 1000
405; CHECK-NEXT:    br i1 [[TMP12]], label [[BB13]], label [[BB74]], !llvm.loop [[LOOP9:![0-9]+]]
406; CHECK:       bb74:
407; CHECK-NEXT:    ret void
408;
409bb:
410  br label %bb13
411
412bb13:                                             ; preds = %bb13, %bb
413  %tmp37 = phi float [ %tmp60, %bb13 ], [ 0.0, %bb ]
414  %tmp27 = phi float [ %tmp49, %bb13 ], [ 1.0, %bb ]
415  %iv = phi i64 [ %iv.next, %bb13 ], [ 0, %bb ]
416  %tmp38 = fmul fast float %tmp37, %tmp27
417  %iv.next = add nuw nsw i64 %iv, 1
418  %gep = getelementptr inbounds float, ptr %C, i64 %iv
419  %tmp49 = load float, ptr %A, align 4
420  %tmp60 = load float, ptr %B, align 4
421  store float %tmp38, ptr %gep
422  %tmp12 = icmp slt i64 %iv, 1000
423  br i1 %tmp12, label %bb13, label %bb74
424
425bb74:                                             ; preds = %bb13
426  ret void
427}
428
429define void @instruction_with_2_FOR_operands_and_multiple_other_uses(ptr noalias %dst.1, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %for.ptr.1, ptr noalias %for.ptr.2) {
430; CHECK-LABEL: @instruction_with_2_FOR_operands_and_multiple_other_uses(
431; CHECK-NEXT:  bb:
432; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
433; CHECK:       vector.ph:
434; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
435; CHECK:       vector.body:
436; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
437; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
438; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
439; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
440; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
441; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
442; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
443; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
444; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
445; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
446; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
447; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
448; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
449; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP4]], [[TMP2]]
450; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
451; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
452; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 0
453; CHECK-NEXT:    store <4 x float> [[TMP6]], ptr [[TMP9]], align 4
454; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[DST_2:%.*]], i64 [[TMP0]]
455; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 0
456; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP11]], align 4
457; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[DST_3:%.*]], i64 [[TMP0]]
458; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
459; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP13]], align 4
460; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
461; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
462; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
463; CHECK:       middle.block:
464; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
465; CHECK:       scalar.ph:
466; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
467; CHECK-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ]
468; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
469; CHECK-NEXT:    br label [[LOOP:%.*]]
470; CHECK:       loop:
471; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
472; CHECK-NEXT:    [[SCALAR_RECUR5:%.*]] = phi float [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
473; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
474; CHECK-NEXT:    [[FOR_1_USE_1:%.*]] = fmul fast float [[SCALAR_RECUR]], 2.000000e+00
475; CHECK-NEXT:    [[USED_BY_BOTH:%.*]] = fmul fast float [[SCALAR_RECUR]], [[SCALAR_RECUR5]]
476; CHECK-NEXT:    [[FOR_2_NEXT]] = load float, ptr [[FOR_PTR_2]], align 4
477; CHECK-NEXT:    [[FOR_1_USE_3:%.*]] = fadd fast float [[SCALAR_RECUR]], 1.000000e+00
478; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
479; CHECK-NEXT:    [[FOR_1_NEXT]] = load float, ptr [[FOR_PTR_1]], align 4
480; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST_1]], i64 [[IV]]
481; CHECK-NEXT:    store float [[USED_BY_BOTH]], ptr [[GEP_DST_1]], align 4
482; CHECK-NEXT:    [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST_2]], i64 [[IV]]
483; CHECK-NEXT:    store float [[FOR_1_USE_1]], ptr [[GEP_DST_2]], align 4
484; CHECK-NEXT:    [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST_3]], i64 [[IV]]
485; CHECK-NEXT:    store float [[FOR_1_USE_3]], ptr [[GEP_DST_3]], align 4
486; CHECK-NEXT:    [[EC:%.*]] = icmp slt i64 [[IV]], 1000
487; CHECK-NEXT:    br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
488; CHECK:       exit:
489; CHECK-NEXT:    ret void
490;
491bb:
492  br label %loop
493
494loop:
495  %for.1 = phi float [ 0.0, %bb ], [ %for.1.next, %loop]
496  %for.2 = phi float [ 0.0, %bb ], [ %for.2.next, %loop]
497  %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
498  %for.1.use.1  = fmul fast float %for.1, 2.0
499  %used.by.both = fmul fast float %for.1, %for.2
500  %for.2.next = load float, ptr %for.ptr.2, align 4
501  %for.1.use.3 = fadd fast float %for.1, 1.0
502  %iv.next = add nuw nsw i64 %iv, 1
503  %for.1.next = load float, ptr %for.ptr.1, align 4
504  %gep.dst.1 = getelementptr inbounds float, ptr %dst.1, i64 %iv
505  store float %used.by.both, ptr %gep.dst.1
506  %gep.dst.2 = getelementptr inbounds float, ptr %dst.2, i64 %iv
507  store float %for.1.use.1, ptr %gep.dst.2
508  %gep.dst.3 = getelementptr inbounds float, ptr %dst.3, i64 %iv
509  store float %for.1.use.3, ptr %gep.dst.3
510  %ec = icmp slt i64 %iv, 1000
511  br i1 %ec, label %loop, label %exit
512
513exit:
514  ret void
515}
516
517; Variation of @instruction_with_2_FOR_operands_and_multiple_other_uses, with
518; multiple instructions in a chain from for.1 to %used.by.both.
519define void @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(ptr noalias %dst.1, ptr noalias %dst.2, ptr noalias %dst.3, ptr noalias %for.ptr.1, ptr noalias %for.ptr.2) {
520; CHECK-LABEL: @instruction_with_2_FOR_operands_and_multiple_other_uses_chain(
521; CHECK-NEXT:  bb:
522; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
523; CHECK:       vector.ph:
524; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
525; CHECK:       vector.body:
526; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
527; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT3:%.*]], [[VECTOR_BODY]] ]
528; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x float> [ <float poison, float poison, float poison, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
529; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
530; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[FOR_PTR_2:%.*]], align 4
531; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0
532; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
533; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR1]], <4 x float> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
534; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[FOR_PTR_1:%.*]], align 4
535; CHECK-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
536; CHECK-NEXT:    [[BROADCAST_SPLAT3]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
537; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x float> [[VECTOR_RECUR]], <4 x float> [[BROADCAST_SPLAT3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
538; CHECK-NEXT:    [[TMP5:%.*]] = fmul fast <4 x float> [[TMP4]], splat (float 2.000000e+00)
539; CHECK-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[TMP5]], splat (float 2.000000e+00)
540; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP2]]
541; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast <4 x float> [[TMP4]], splat (float 1.000000e+00)
542; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[DST_1:%.*]], i64 [[TMP0]]
543; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
544; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP10]], align 4
545; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[DST_2:%.*]], i64 [[TMP0]]
546; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
547; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP12]], align 4
548; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[DST_3:%.*]], i64 [[TMP0]]
549; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0
550; CHECK-NEXT:    store <4 x float> [[TMP8]], ptr [[TMP14]], align 4
551; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
552; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
553; CHECK-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
554; CHECK:       middle.block:
555; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
556; CHECK:       scalar.ph:
557; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi float [ [[TMP3]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB:%.*]] ]
558; CHECK-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi float [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[BB]] ]
559; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
560; CHECK-NEXT:    br label [[LOOP:%.*]]
561; CHECK:       loop:
562; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi float [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
563; CHECK-NEXT:    [[SCALAR_RECUR5:%.*]] = phi float [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
564; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
565; CHECK-NEXT:    [[FOR_1_USE_1:%.*]] = fmul fast float [[SCALAR_RECUR]], 2.000000e+00
566; CHECK-NEXT:    [[FOR_1_USE_C:%.*]] = fmul fast float [[FOR_1_USE_1]], 2.000000e+00
567; CHECK-NEXT:    [[USED_BY_BOTH:%.*]] = fmul fast float [[FOR_1_USE_C]], [[SCALAR_RECUR5]]
568; CHECK-NEXT:    [[FOR_2_NEXT]] = load float, ptr [[FOR_PTR_2]], align 4
569; CHECK-NEXT:    [[FOR_1_USE_3:%.*]] = fadd fast float [[SCALAR_RECUR]], 1.000000e+00
570; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
571; CHECK-NEXT:    [[FOR_1_NEXT]] = load float, ptr [[FOR_PTR_1]], align 4
572; CHECK-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST_1]], i64 [[IV]]
573; CHECK-NEXT:    store float [[USED_BY_BOTH]], ptr [[GEP_DST_1]], align 4
574; CHECK-NEXT:    [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST_2]], i64 [[IV]]
575; CHECK-NEXT:    store float [[FOR_1_USE_1]], ptr [[GEP_DST_2]], align 4
576; CHECK-NEXT:    [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST_3]], i64 [[IV]]
577; CHECK-NEXT:    store float [[FOR_1_USE_3]], ptr [[GEP_DST_3]], align 4
578; CHECK-NEXT:    [[EC:%.*]] = icmp slt i64 [[IV]], 1000
579; CHECK-NEXT:    br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP13:![0-9]+]]
580; CHECK:       exit:
581; CHECK-NEXT:    ret void
582;
583bb:
584  br label %loop
585
586loop:
587  %for.1 = phi float [ 0.0, %bb ], [ %for.1.next, %loop]
588  %for.2 = phi float [ 0.0, %bb ], [ %for.2.next, %loop]
589  %iv = phi i64 [ 0, %bb ], [ %iv.next, %loop ]
590  %for.1.use.1  = fmul fast float %for.1, 2.0
591  %for.1.use.c  = fmul fast float %for.1.use.1, 2.0
592  %used.by.both = fmul fast float %for.1.use.c, %for.2
593  %for.2.next = load float, ptr %for.ptr.2, align 4
594  %for.1.use.3 = fadd fast float %for.1, 1.0
595  %iv.next = add nuw nsw i64 %iv, 1
596  %for.1.next = load float, ptr %for.ptr.1, align 4
597  %gep.dst.1 = getelementptr inbounds float, ptr %dst.1, i64 %iv
598  store float %used.by.both, ptr %gep.dst.1
599  %gep.dst.2 = getelementptr inbounds float, ptr %dst.2, i64 %iv
600  store float %for.1.use.1, ptr %gep.dst.2
601  %gep.dst.3 = getelementptr inbounds float, ptr %dst.3, i64 %iv
602  store float %for.1.use.3, ptr %gep.dst.3
603  %ec = icmp slt i64 %iv, 1000
604  br i1 %ec, label %loop, label %exit
605
606exit:
607  ret void
608}
609
610; The (first) reason `%first_time.1` cannot be sunk is because it appears outside
611; the header and is not dominated by Previous. The fact that it feeds Previous
612; is a second sinking-preventing reason.
613define void @cannot_sink_phi(ptr %ptr) {
614; CHECK-LABEL: @cannot_sink_phi(
615; CHECK-NEXT:  entry:
616; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
617; CHECK:       loop.header:
618; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
619; CHECK-NEXT:    [[FOR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_NEXT:%.*]], [[LOOP_LATCH]] ]
620; CHECK-NEXT:    [[C_1:%.*]] = icmp ult i64 [[IV]], 500
621; CHECK-NEXT:    br i1 [[C_1]], label [[IF_TRUEBB:%.*]], label [[IF_FALSEBB:%.*]]
622; CHECK:       if.truebb:
623; CHECK-NEXT:    br label [[LOOP_LATCH]]
624; CHECK:       if.falsebb:
625; CHECK-NEXT:    br label [[LOOP_LATCH]]
626; CHECK:       loop.latch:
627; CHECK-NEXT:    [[FIRST_TIME_1:%.*]] = phi i32 [ 20, [[IF_TRUEBB]] ], [ [[FOR]], [[IF_FALSEBB]] ]
628; CHECK-NEXT:    [[C_2:%.*]] = icmp ult i64 [[IV]], 800
629; CHECK-NEXT:    [[FOR_NEXT]] = select i1 [[C_2]], i32 30, i32 [[FIRST_TIME_1]]
630; CHECK-NEXT:    [[PTR_IDX:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[IV]]
631; CHECK-NEXT:    store i32 [[FOR_NEXT]], ptr [[PTR_IDX]], align 4
632; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
633; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
634; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_HEADER]]
635; CHECK:       exit:
636; CHECK-NEXT:    ret void
637;
638entry:
639  br label %loop.header
640
641loop.header:
642  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
643  %for = phi i32 [ 0, %entry ], [ %for.next, %loop.latch ]
644  %c.1 = icmp ult i64 %iv, 500
645  br i1 %c.1, label %if.truebb, label %if.falsebb
646
647if.truebb:
648  br label %loop.latch
649
650if.falsebb:
651  br label %loop.latch
652
653loop.latch:
654  %first_time.1 = phi i32 [ 20, %if.truebb ], [ %for, %if.falsebb ]
655  %c.2 = icmp ult i64 %iv, 800
656  %for.next = select i1 %c.2, i32 30, i32 %first_time.1
657  %ptr.idx = getelementptr i32, ptr %ptr, i64 %iv
658  store i32 %for.next, ptr %ptr.idx
659  %iv.next = add nuw nsw i64 %iv, 1
660  %exitcond.not = icmp eq i64 %iv.next, 1000
661  br i1 %exitcond.not, label %exit, label %loop.header
662
663exit:
664  ret void
665}
666
667; A recurrence in a multiple exit loop.
668define i16 @multiple_exit(ptr %p, i32 %n) {
669; CHECK-LABEL: @multiple_exit(
670; CHECK-NEXT:  entry:
671; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
672; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
673; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
674; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
675; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
676; CHECK:       vector.ph:
677; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
678; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
679; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
680; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
681; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
682; CHECK:       vector.body:
683; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
684; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
685; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
686; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
687; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
688; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
689; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2
690; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
691; CHECK-NEXT:    store <4 x i16> [[TMP7]], ptr [[TMP6]], align 4
692; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
693; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
694; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
695; CHECK:       middle.block:
696; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
697; CHECK-NEXT:    br label [[SCALAR_PH]]
698; CHECK:       scalar.ph:
699; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
700; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
701; CHECK-NEXT:    br label [[FOR_COND:%.*]]
702; CHECK:       for.cond:
703; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
704; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ]
705; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
706; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
707; CHECK-NEXT:    [[REC_NEXT]] = load i16, ptr [[B]], align 2
708; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
709; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
710; CHECK:       for.body:
711; CHECK-NEXT:    store i16 [[SCALAR_RECUR]], ptr [[B]], align 4
712; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
713; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
714; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP15:![0-9]+]]
715; CHECK:       if.end:
716; CHECK-NEXT:    [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[SCALAR_RECUR]], [[FOR_COND]] ]
717; CHECK-NEXT:    ret i16 [[REC_LCSSA]]
718;
719entry:
720  br label %for.cond
721
722for.cond:
723  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
724  %rec = phi i16 [0, %entry], [ %rec.next, %for.body ]
725  %iprom = sext i32 %i to i64
726  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
727  %rec.next = load i16, ptr %b
728  %cmp = icmp slt i32 %i, %n
729  br i1 %cmp, label %for.body, label %if.end
730
731for.body:
732  store i16 %rec , ptr %b, align 4
733  %inc = add nsw i32 %i, 1
734  %cmp2 = icmp slt i32 %i, 2096
735  br i1 %cmp2, label %for.cond, label %if.end
736
737if.end:
738  ret i16 %rec
739}
740
741
742; A multiple exit case where one of the exiting edges involves a value
743; from the recurrence and one does not.
744define i16 @multiple_exit2(ptr %p, i32 %n) {
745; CHECK-LABEL: @multiple_exit2(
746; CHECK-NEXT:  entry:
747; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
748; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
749; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
750; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 4
751; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
752; CHECK:       vector.ph:
753; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
754; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
755; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 4, i32 [[N_MOD_VF]]
756; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
757; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
758; CHECK:       vector.body:
759; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
760; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
761; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
762; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
763; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]]
764; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0
765; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP6]], align 2
766; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
767; CHECK-NEXT:    store <4 x i16> [[TMP7]], ptr [[TMP6]], align 4
768; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
769; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
770; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
771; CHECK:       middle.block:
772; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
773; CHECK-NEXT:    br label [[SCALAR_PH]]
774; CHECK:       scalar.ph:
775; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
776; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
777; CHECK-NEXT:    br label [[FOR_COND:%.*]]
778; CHECK:       for.cond:
779; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
780; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[FOR_BODY]] ]
781; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
782; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 [[IPROM]]
783; CHECK-NEXT:    [[REC_NEXT]] = load i16, ptr [[B]], align 2
784; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
785; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
786; CHECK:       for.body:
787; CHECK-NEXT:    store i16 [[SCALAR_RECUR]], ptr [[B]], align 4
788; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
789; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
790; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP17:![0-9]+]]
791; CHECK:       if.end:
792; CHECK-NEXT:    [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[FOR_COND]] ], [ 10, [[FOR_BODY]] ]
793; CHECK-NEXT:    ret i16 [[REC_LCSSA]]
794;
795entry:
796  br label %for.cond
797
798for.cond:
799  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
800  %rec = phi i16 [0, %entry], [ %rec.next, %for.body ]
801  %iprom = sext i32 %i to i64
802  %b = getelementptr inbounds i16, ptr %p, i64 %iprom
803  %rec.next = load i16, ptr %b
804  %cmp = icmp slt i32 %i, %n
805  br i1 %cmp, label %for.body, label %if.end
806
807for.body:
808  store i16 %rec , ptr %b, align 4
809  %inc = add nsw i32 %i, 1
810  %cmp2 = icmp slt i32 %i, 2096
811  br i1 %cmp2, label %for.cond, label %if.end
812
813if.end:
814  %rec.lcssa = phi i16 [ %rec, %for.cond ], [ 10, %for.body ]
815  ret i16 %rec.lcssa
816}
817
818; A test where the instructions to sink may not be visited in dominance order.
819define void @sink_dominance(ptr %ptr, i32 %N) {
820; CHECK-LABEL: @sink_dominance(
821; CHECK-NEXT:  entry:
822; CHECK-NEXT:    [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
823; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
824; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
825; CHECK:       vector.scevcheck:
826; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
827; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
828; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
829; CHECK-NEXT:    br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
830; CHECK:       vector.ph:
831; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
832; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
833; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
834; CHECK:       vector.body:
835; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
836; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
837; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
838; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP2]]
839; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
840; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
841; CHECK-NEXT:    [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
842; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
843; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
844; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
845; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP7]], <4 x i32> splat (i32 22)
846; CHECK-NEXT:    store <4 x i32> [[TMP9]], ptr [[TMP4]], align 4
847; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
848; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
849; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
850; CHECK:       middle.block:
851; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
852; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
853; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
854; CHECK:       scalar.ph:
855; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
856; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
857; CHECK-NEXT:    br label [[LOOP:%.*]]
858; CHECK:       loop:
859; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
860; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
861; CHECK-NEXT:    [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
862; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
863; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 22
864; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
865; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[GEP]], align 4
866; CHECK-NEXT:    [[FOR_NEXT]] = zext i32 [[LV]] to i64
867; CHECK-NEXT:    store i32 [[SELECT]], ptr [[GEP]], align 4
868; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
869; CHECK-NEXT:    [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
870; CHECK-NEXT:    br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP19:![0-9]+]]
871; CHECK:       exit:
872; CHECK-NEXT:    ret void
873;
874entry:
875  br label %loop
876
877loop:
878  %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
879  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
880
881  %for.trunc = trunc i64 %for to i32
882  %cmp = icmp slt i32 %for.trunc, 213
883  %select = select i1 %cmp, i32 %for.trunc, i32 22
884
885  %gep = getelementptr inbounds i32, ptr %ptr, i32 %iv
886  %lv = load i32, ptr %gep, align 4
887  %for.next = zext i32 %lv to i64
888  store i32 %select, ptr %gep
889
890  %iv.next = add i32 %iv, 1
891  %cmp73 = icmp ugt i32 %N, %iv.next
892  br i1 %cmp73, label %loop, label %exit
893
894exit:
895  ret void
896}
897
898; Similar to @sink_dominance, but with 2 separate chains that merge at %select
899; with a different number of instructions in between.
900define void @sink_dominance_2(ptr %ptr, i32 %N) {
901; CHECK-LABEL: @sink_dominance_2(
902; CHECK-NEXT:  entry:
903; CHECK-NEXT:    [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
904; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 4
905; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
906; CHECK:       vector.scevcheck:
907; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1)
908; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
909; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0
910; CHECK-NEXT:    br i1 [[TMP1]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
911; CHECK:       vector.ph:
912; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 4
913; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]]
914; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
915; CHECK:       vector.body:
916; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
917; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
918; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
919; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP2]]
920; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
921; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
922; CHECK-NEXT:    [[TMP5]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
923; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
924; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
925; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP7]], splat (i32 2)
926; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[TMP8]], splat (i32 99)
927; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt <4 x i32> [[TMP7]], splat (i32 213)
928; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP7]], <4 x i32> [[TMP9]]
929; CHECK-NEXT:    store <4 x i32> [[TMP11]], ptr [[TMP4]], align 4
930; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
931; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
932; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
933; CHECK:       middle.block:
934; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
935; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]]
936; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
937; CHECK:       scalar.ph:
938; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
939; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
940; CHECK-NEXT:    br label [[LOOP:%.*]]
941; CHECK:       loop:
942; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
943; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
944; CHECK-NEXT:    [[FOR_TRUNC:%.*]] = trunc i64 [[SCALAR_RECUR]] to i32
945; CHECK-NEXT:    [[STEP:%.*]] = add i32 [[FOR_TRUNC]], 2
946; CHECK-NEXT:    [[STEP_2:%.*]] = mul i32 [[STEP]], 99
947; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[FOR_TRUNC]], 213
948; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[FOR_TRUNC]], i32 [[STEP_2]]
949; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
950; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[GEP]], align 4
951; CHECK-NEXT:    [[FOR_NEXT]] = zext i32 [[LV]] to i64
952; CHECK-NEXT:    store i32 [[SELECT]], ptr [[GEP]], align 4
953; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
954; CHECK-NEXT:    [[CMP73:%.*]] = icmp ugt i32 [[N]], [[IV_NEXT]]
955; CHECK-NEXT:    br i1 [[CMP73]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP21:![0-9]+]]
956; CHECK:       exit:
957; CHECK-NEXT:    ret void
958;
959entry:
960  br label %loop
961
962loop:
963  %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
964  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
965
966  %for.trunc = trunc i64 %for to i32
967  %step = add i32 %for.trunc, 2
968  %step.2 = mul i32 %step, 99
969
970  %cmp = icmp slt i32 %for.trunc, 213
971  %select = select i1 %cmp, i32 %for.trunc, i32 %step.2
972
973  %gep = getelementptr inbounds i32, ptr %ptr, i32 %iv
974  %lv = load i32, ptr %gep, align 4
975  %for.next = zext i32 %lv to i64
976  store i32 %select, ptr %gep
977
978  %iv.next = add i32 %iv, 1
979  %cmp73 = icmp ugt i32 %N, %iv.next
980  br i1 %cmp73, label %loop, label %exit
981
982exit:
983  ret void
984}
985
986define void @cannot_sink_load_past_store(ptr %ptr, i32 %N) {
987; CHECK-LABEL: @cannot_sink_load_past_store(
988; CHECK-NEXT:  entry:
989; CHECK-NEXT:    br label [[LOOP:%.*]]
990; CHECK:       loop:
991; CHECK-NEXT:    [[FOR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[FOR_NEXT:%.*]], [[LOOP]] ]
992; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
993; CHECK-NEXT:    [[GEP_FOR:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[FOR]]
994; CHECK-NEXT:    [[LV_FOR:%.*]] = load i32, ptr [[GEP_FOR]], align 4
995; CHECK-NEXT:    [[FOR_TRUNC:%.*]] = trunc i64 [[FOR]] to i32
996; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[LV_FOR]], [[FOR_TRUNC]]
997; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[LV_FOR]], i32 22
998; CHECK-NEXT:    [[GEP_IV:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[IV]]
999; CHECK-NEXT:    store i32 0, ptr [[GEP_IV]], align 4
1000; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
1001; CHECK-NEXT:    [[FOR_NEXT]] = zext i32 [[IV]] to i64
1002; CHECK-NEXT:    [[CMP73:%.*]] = icmp ugt i32 [[N:%.*]], [[IV_NEXT]]
1003; CHECK-NEXT:    br i1 [[CMP73]], label [[LOOP]], label [[EXIT:%.*]]
1004; CHECK:       exit:
1005; CHECK-NEXT:    ret void
1006;
1007entry:
1008  br label %loop
1009
1010loop:
1011  %for = phi i64 [ 0, %entry ], [ %for.next, %loop ]
1012  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
1013
1014  %gep.for = getelementptr inbounds i32, ptr %ptr, i64 %for
1015  %lv.for = load i32, ptr %gep.for, align 4
1016  %for.trunc = trunc i64 %for to i32
1017  %cmp = icmp slt i32 %lv.for, %for.trunc
1018  %select = select i1 %cmp, i32 %lv.for, i32 22
1019
1020  %gep.iv = getelementptr inbounds i32, ptr %ptr, i32 %iv
1021  store i32 0, ptr %gep.iv
1022  %iv.next = add i32 %iv, 1
1023  %for.next = zext i32 %iv to i64
1024
1025  %cmp73 = icmp ugt i32 %N, %iv.next
1026  br i1 %cmp73, label %loop, label %exit
1027
1028exit:
1029  ret void
1030}
1031
1032define void @test_for_sink_instruction_after_same_incoming_1(ptr %ptr) {
1033; CHECK-LABEL: @test_for_sink_instruction_after_same_incoming_1(
1034; CHECK-NEXT:  entry:
1035; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1036; CHECK:       vector.ph:
1037; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1038; CHECK:       vector.body:
1039; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1040; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 1.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1041; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 2.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD]], [[VECTOR_BODY]] ]
1042; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1043; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1044; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[PTR:%.*]], i64 [[TMP0]]
1045; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1046; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
1047; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1048; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP3]]
1049; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1050; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x double> [[TMP4]], [[TMP5]]
1051; CHECK-NEXT:    store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
1052; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1053; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1054; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1055; CHECK:       middle.block:
1056; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1057; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1058; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1059; CHECK:       scalar.ph:
1060; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY:%.*]] ]
1061; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY]] ]
1062; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
1063; CHECK-NEXT:    br label [[LOOP:%.*]]
1064; CHECK:       loop:
1065; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
1066; CHECK-NEXT:    [[SCALAR_RECUR4:%.*]] = phi double [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT]], [[LOOP]] ]
1067; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1068; CHECK-NEXT:    [[ADD_1:%.*]] = fadd double 1.000000e+01, [[SCALAR_RECUR4]]
1069; CHECK-NEXT:    [[ADD_2:%.*]] = fadd double [[ADD_1]], [[SCALAR_RECUR]]
1070; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1071; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[IV]]
1072; CHECK-NEXT:    [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
1073; CHECK-NEXT:    store double [[ADD_2]], ptr [[GEP_PTR]], align 8
1074; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1075; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
1076; CHECK:       exit:
1077; CHECK-NEXT:    ret void
1078;
1079entry:
1080  br label %loop
1081
1082loop:
1083  %for.1 = phi double [ 10.0, %entry ], [ %for.1.next, %loop ]
1084  %for.2 = phi double [ 20.0, %entry ], [ %for.1.next, %loop ]
1085  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
1086  %add.1 = fadd double 10.0, %for.2
1087  %add.2 = fadd double %add.1, %for.1
1088  %iv.next = add nuw nsw i64 %iv, 1
1089  %gep.ptr = getelementptr inbounds double, ptr %ptr, i64 %iv
1090  %for.1.next  = load double, ptr %gep.ptr, align 8
1091  store double %add.2, ptr %gep.ptr
1092  %exitcond.not = icmp eq i64 %iv.next, 1000
1093  br i1 %exitcond.not, label %exit, label %loop
1094
1095exit:
1096  ret void
1097}
1098
1099
1100define void @test_for_sink_instruction_after_same_incoming_2(ptr %ptr) {
1101; CHECK-LABEL: @test_for_sink_instruction_after_same_incoming_2(
1102; CHECK-NEXT:  entry:
1103; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1104; CHECK:       vector.ph:
1105; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1106; CHECK:       vector.body:
1107; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1108; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 2.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1109; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 1.000000e+01>, [[VECTOR_PH]] ], [ [[WIDE_LOAD]], [[VECTOR_BODY]] ]
1110; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1111; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
1112; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[PTR:%.*]], i64 [[TMP0]]
1113; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1114; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
1115; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1116; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1117; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> splat (double 1.000000e+01), [[TMP4]]
1118; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x double> [[TMP5]], [[TMP3]]
1119; CHECK-NEXT:    store <4 x double> [[TMP6]], ptr [[TMP2]], align 8
1120; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1121; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
1122; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1123; CHECK:       middle.block:
1124; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1125; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
1126; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
1127; CHECK:       scalar.ph:
1128; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 2.000000e+01, [[ENTRY:%.*]] ]
1129; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi double [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ], [ 1.000000e+01, [[ENTRY]] ]
1130; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 997, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
1131; CHECK-NEXT:    br label [[LOOP:%.*]]
1132; CHECK:       loop:
1133; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
1134; CHECK-NEXT:    [[SCALAR_RECUR4:%.*]] = phi double [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT]], [[LOOP]] ]
1135; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1136; CHECK-NEXT:    [[ADD_1:%.*]] = fadd double 1.000000e+01, [[SCALAR_RECUR]]
1137; CHECK-NEXT:    [[ADD_2:%.*]] = fadd double [[ADD_1]], [[SCALAR_RECUR4]]
1138; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1139; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr inbounds double, ptr [[PTR]], i64 [[IV]]
1140; CHECK-NEXT:    [[FOR_1_NEXT]] = load double, ptr [[GEP_PTR]], align 8
1141; CHECK-NEXT:    store double [[ADD_2]], ptr [[GEP_PTR]], align 8
1142; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
1143; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
1144; CHECK:       exit:
1145; CHECK-NEXT:    ret void
1146;
1147entry:
1148  br label %loop
1149
1150loop:
1151  %for.2 = phi double [ 20.0, %entry ], [ %for.1.next, %loop ]
1152  %for.1 = phi double [ 10.0, %entry ], [ %for.1.next, %loop ]
1153  %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
1154  %add.1 = fadd double 10.0, %for.2
1155  %add.2 = fadd double %add.1, %for.1
1156  %iv.next = add nuw nsw i64 %iv, 1
1157  %gep.ptr = getelementptr inbounds double, ptr %ptr, i64 %iv
1158  %for.1.next  = load double, ptr %gep.ptr, align 8
1159  store double %add.2, ptr %gep.ptr
1160  %exitcond.not = icmp eq i64 %iv.next, 1000
1161  br i1 %exitcond.not, label %exit, label %loop
1162
1163exit:
1164  ret void
1165}
1166