xref: /llvm-project/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize -force-vector-width=2 -S %s | FileCheck %s
3
4@src = external global [32 x i16], align 1
5@dst = external global [32 x i16], align 1
6
7; The load in the loop does not need predication, because the accessed memory
8; is de-referenceable for all loop iterations.
9define void @single_incoming_phi_no_blend_mask(i64 %a, i64 %b) {
10; CHECK-LABEL: @single_incoming_phi_no_blend_mask(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
13; CHECK:       vector.ph:
14; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
15; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
16; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
17; CHECK:       vector.body:
18; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
19; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16
22; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP1]], 0
23; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]]
24; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 0
25; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP4]], align 1
26; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
27; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i16> splat (i16 1), <2 x i16> [[WIDE_LOAD]]
28; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
29; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 0
30; CHECK-NEXT:    store <2 x i16> [[PREDPHI]], ptr [[TMP7]], align 2
31; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
32; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
33; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
34; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
35; CHECK:       middle.block:
36; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
37; CHECK:       scalar.ph:
38; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
39; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
40; CHECK:       loop.header:
41; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
42; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
43; CHECK-NEXT:    br label [[LOOP_COND:%.*]]
44; CHECK:       loop.cond:
45; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
46; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
47; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
48; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
49; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
50; CHECK:       loop.next:
51; CHECK-NEXT:    br label [[LOOP_LATCH]]
52; CHECK:       loop.latch:
53; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
54; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
55; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
56; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
57; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
58; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
59; CHECK:       exit:
60; CHECK-NEXT:    ret void
61;
62entry:
63  br label %loop.header
64
65loop.header:
66  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
67  %iv.trunc = trunc i64 %iv to i16
68  br label %loop.cond
69
70loop.cond:
71  %blend = phi i16 [ %iv.trunc, %loop.header ]
72  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
73  %lv = load i16, ptr %src.ptr, align 1
74  %cmp.b = icmp sgt i64 %iv, %a
75  br i1 %cmp.b, label %loop.next, label %loop.latch
76
77loop.next:
78  br label %loop.latch
79
80loop.latch:
81  %res = phi i16 [ %lv, %loop.cond ], [ 1, %loop.next ]
82  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
83  store i16 %res, ptr %dst.ptr
84  %iv.next = add nuw nsw i64 %iv, 1
85  %cmp439 = icmp ult i64 %iv, 31
86  br i1 %cmp439, label %loop.header, label %exit
87
88exit:
89  ret void
90}
91
92; The load in the loop does not need predication, because the accessed memory
93; is de-referenceable for all loop iterations.
94define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
95; CHECK-LABEL: @single_incoming_phi_with_blend_mask(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
98; CHECK:       vector.ph:
99; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
100; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
101; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
102; CHECK:       vector.body:
103; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
104; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
105; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
106; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16
107; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[TMP1]], 0
108; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
109; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [32 x i16], ptr @src, i16 0, i16 [[TMP2]]
110; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0
111; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP5]], align 1
112; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
113; CHECK-NEXT:    [[TMP7:%.*]] = select <2 x i1> [[TMP3]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer
114; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
115; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP8]], <2 x i16> zeroinitializer, <2 x i16> [[WIDE_LOAD]]
116; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <2 x i1> [[TMP7]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
117; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
118; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP9]], i32 0
119; CHECK-NEXT:    store <2 x i16> [[PREDPHI1]], ptr [[TMP10]], align 2
120; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
121; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
122; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
123; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
124; CHECK:       middle.block:
125; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
126; CHECK:       scalar.ph:
127; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
128; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
129; CHECK:       loop.header:
130; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
131; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
132; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
133; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
134; CHECK:       loop.cond:
135; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
136; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
137; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
138; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
139; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
140; CHECK:       loop.next:
141; CHECK-NEXT:    br label [[LOOP_LATCH]]
142; CHECK:       loop.latch:
143; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
144; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
145; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
146; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
147; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
148; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
149; CHECK:       exit:
150; CHECK-NEXT:    ret void
151;
152entry:
153  br label %loop.header
154
155loop.header:
156  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
157  %iv.trunc = trunc i64 %iv to i16
158  %cmp.a = icmp ugt i64 %iv, %a
159  br i1 %cmp.a, label %loop.cond, label %loop.latch
160
161loop.cond:
162  %blend = phi i16 [ %iv.trunc, %loop.header ]
163  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
164  %lv = load i16, ptr %src.ptr, align 1
165  %cmp.b = icmp sgt i64 %iv, %a
166  br i1 %cmp.b, label %loop.next, label %loop.latch
167
168loop.next:
169  br label %loop.latch
170
171loop.latch:
172  %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ]
173  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
174  store i16 %res, ptr %dst.ptr
175  %iv.next = add nuw nsw i64 %iv, 1
176  %cmp439 = icmp ult i64 %iv, 31
177  br i1 %cmp439, label %loop.header, label %exit
178
179exit:
180  ret void
181}
182
183define void @multiple_incoming_phi_with_blend_mask(i64 %a, ptr noalias %dst) {
184; CHECK-LABEL: @multiple_incoming_phi_with_blend_mask(
185; CHECK-NEXT:  entry:
186; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
187; CHECK:       vector.ph:
188; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
189; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
190; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
191; CHECK:       vector.body:
192; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
193; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
194; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
195; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VECTOR_BODY]] ]
196; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
197; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
198; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i16> [[VEC_IND3]], <2 x i16> [[VEC_IND1]]
199; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 0
200; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP2]]
201; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1
202; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP4]]
203; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP3]], align 1
204; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[TMP5]], align 1
205; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
206; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP7]], i32 1
207; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[TMP0]]
208; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i32 0
209; CHECK-NEXT:    store <2 x i16> [[TMP9]], ptr [[TMP11]], align 2
210; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
211; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
212; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], splat (i16 2)
213; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <2 x i16> [[VEC_IND3]], splat (i16 2)
214; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32
215; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
216; CHECK:       middle.block:
217; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
218; CHECK:       scalar.ph:
219; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
220; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
221; CHECK:       loop.header:
222; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
223; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
224; CHECK-NEXT:    [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16
225; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
226; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
227; CHECK:       loop.next:
228; CHECK-NEXT:    br label [[LOOP_LATCH]]
229; CHECK:       loop.latch:
230; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ]
231; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
232; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
233; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
234; CHECK-NEXT:    store i16 [[LV]], ptr [[DST_PTR]], align 2
235; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
236; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
237; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
238; CHECK:       exit:
239; CHECK-NEXT:    ret void
240;
241entry:
242  br label %loop.header
243
244loop.header:
245  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
246  %iv.trunc = trunc i64 %iv to i16
247  %iv.trunc.2 = trunc i64 %iv to i16
248  %cmp.a = icmp ugt i64 %iv, %a
249  br i1 %cmp.a, label %loop.next, label %loop.latch
250
251loop.next:
252  br label %loop.latch
253
254loop.latch:
255  %blend = phi i16 [ %iv.trunc, %loop.header ], [ %iv.trunc.2, %loop.next ]
256  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
257  %lv = load i16, ptr %src.ptr, align 1
258  %dst.ptr = getelementptr inbounds i16, ptr %dst, i64 %iv
259  store i16 %lv, ptr %dst.ptr
260  %iv.next = add nuw nsw i64 %iv, 1
261  %cmp439 = icmp ult i64 %iv, 31
262  br i1 %cmp439, label %loop.header, label %exit
263
264exit:
265  ret void
266}
267
268; The load in the loop needs predication, because the accessed memory is not
269; de-referencable for all iterations of the loop.
270define void @single_incoming_needs_predication(i64 %a, i64 %b) {
271; CHECK-LABEL: @single_incoming_needs_predication(
272; CHECK-NEXT:  entry:
273; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
274; CHECK:       vector.ph:
275; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A:%.*]], i64 0
276; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
277; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
278; CHECK:       vector.body:
279; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ]
280; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ]
281; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
282; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i16
283; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
284; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
285; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
286; CHECK:       pred.load.if:
287; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[TMP1]], 0
288; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP4]]
289; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 1
290; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
291; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
292; CHECK:       pred.load.continue:
293; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
294; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1
295; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]]
296; CHECK:       pred.load.if1:
297; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[TMP1]], 1
298; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[TMP10]]
299; CHECK-NEXT:    [[TMP12:%.*]] = load i16, ptr [[TMP11]], align 1
300; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP12]], i32 1
301; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
302; CHECK:       pred.load.continue2:
303; CHECK-NEXT:    [[TMP14:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
304; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt <2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
305; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP2]], <2 x i1> [[TMP15]], <2 x i1> zeroinitializer
306; CHECK-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP2]], splat (i1 true)
307; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP17]], <2 x i16> zeroinitializer, <2 x i16> [[TMP14]]
308; CHECK-NEXT:    [[PREDPHI3:%.*]] = select <2 x i1> [[TMP16]], <2 x i16> splat (i16 1), <2 x i16> [[PREDPHI]]
309; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP0]]
310; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP18]], i32 0
311; CHECK-NEXT:    store <2 x i16> [[PREDPHI3]], ptr [[TMP19]], align 2
312; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
313; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
314; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
315; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
316; CHECK:       middle.block:
317; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
318; CHECK:       scalar.ph:
319; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
320; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
321; CHECK:       loop.header:
322; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
323; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
324; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
325; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
326; CHECK:       loop.cond:
327; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
328; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 [[BLEND]]
329; CHECK-NEXT:    [[LV:%.*]] = load i16, ptr [[SRC_PTR]], align 1
330; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
331; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
332; CHECK:       loop.next:
333; CHECK-NEXT:    br label [[LOOP_LATCH]]
334; CHECK:       loop.latch:
335; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
336; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[IV]]
337; CHECK-NEXT:    store i16 [[RES]], ptr [[DST_PTR]], align 2
338; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
339; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 63
340; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP9:![0-9]+]]
341; CHECK:       exit:
342; CHECK-NEXT:    ret void
343;
344entry:
345  br label %loop.header
346
347loop.header:
348  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
349  %iv.trunc = trunc i64 %iv to i16
350  %cmp.a = icmp ugt i64 %iv, %a
351  br i1 %cmp.a, label %loop.cond, label %loop.latch
352
353loop.cond:
354  %blend = phi i16 [ %iv.trunc, %loop.header ]
355  %src.ptr = getelementptr inbounds [32 x i16], ptr @src, i16 0, i16 %blend
356  %lv = load i16, ptr %src.ptr, align 1
357  %cmp.b = icmp sgt i64 %iv, %a
358  br i1 %cmp.b, label %loop.next, label %loop.latch
359
360loop.next:
361  br label %loop.latch
362
363loop.latch:
364  %res = phi i16 [ 0, %loop.header ], [ %lv, %loop.cond ], [ 1, %loop.next ]
365  %dst.ptr = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 %iv
366  store i16 %res, ptr %dst.ptr
367  %iv.next = add nuw nsw i64 %iv, 1
368  %cmp439 = icmp ult i64 %iv, 63
369  br i1 %cmp439, label %loop.header, label %exit
370
371exit:
372  ret void
373}
374
375; Test case for PR44800.
376define void @duplicated_incoming_blocks_blend(i32 %x, ptr %ptr) {
377; CHECK-LABEL: @duplicated_incoming_blocks_blend(
378; CHECK-NEXT:  entry:
379; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
380; CHECK:       vector.ph:
381; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
382; CHECK:       vector.body:
383; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
384; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
385; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
386; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i32 [[TMP0]]
387; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0
388; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4
389; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
390; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
391; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
392; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
393; CHECK:       middle.block:
394; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
395; CHECK:       scalar.ph:
396; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
397; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
398; CHECK:       loop.header:
399; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ]
400; CHECK-NEXT:    [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X:%.*]]
401; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]]
402; CHECK:       loop.latch:
403; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ]
404; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[P]]
405; CHECK-NEXT:    store i32 [[P]], ptr [[GEP_PTR]], align 4
406; CHECK-NEXT:    [[ADD_I]] = add nsw i32 [[P]], 1
407; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000
408; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP11:![0-9]+]]
409; CHECK:       exit:
410; CHECK-NEXT:    ret void
411;
412entry:
413  br label %loop.header
414
415loop.header:
416  %iv = phi i32 [ 0 , %entry ], [ %add.i, %loop.latch ]
417  %c.0 = icmp ugt i32 %iv, %x
418  br i1 %c.0, label %loop.latch, label %loop.latch
419
420loop.latch:
421  %p = phi i32 [ %iv, %loop.header ], [ %iv, %loop.header ]
422  %gep.ptr = getelementptr i32, ptr %ptr, i32 %p
423  store i32 %p, ptr %gep.ptr
424  %add.i = add nsw i32 %p, 1
425  %cmp = icmp slt i32 %add.i, 1000
426  br i1 %cmp, label %loop.header, label %exit
427
428exit:
429  ret void
430}
431