xref: /llvm-project/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll (revision ed253ef77248d91a15b3a1aa36c0b74bed8ec8af)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -dce -instcombine -S | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5
6define i32 @reduction_sum_single(i32* noalias nocapture %A) {
7; CHECK-LABEL: @reduction_sum_single(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10; CHECK:       vector.ph:
11; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
12; CHECK:       vector.body:
13; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
14; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
15; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
16; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
17; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
18; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
19; CHECK:       pred.load.if:
20; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
21; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
22; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
23; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
24; CHECK:       pred.load.continue:
25; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
26; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
27; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
28; CHECK:       pred.load.if1:
29; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
30; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
31; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
32; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1
33; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
34; CHECK:       pred.load.continue2:
35; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
36; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
37; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
38; CHECK:       pred.load.if3:
39; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
40; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
41; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
42; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2
43; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
44; CHECK:       pred.load.continue4:
45; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
46; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
47; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
48; CHECK:       pred.load.if5:
49; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
50; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
51; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
52; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3
53; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
54; CHECK:       pred.load.continue6:
55; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
56; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
57; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
58; CHECK-NEXT:    [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
59; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
60; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
61; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
62; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
63; CHECK:       middle.block:
64; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
65; CHECK:       scalar.ph:
66; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
67; CHECK:       .lr.ph:
68; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP2:!llvm.loop !.*]]
69; CHECK:       ._crit_edge:
70; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
71; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
72;
73entry:
74  br label %.lr.ph
75
76.lr.ph:                                           ; preds = %entry, %.lr.ph
77  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
78  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
79  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
80  %l3 = load i32, i32* %l2, align 4
81  %l7 = add i32 %sum.02, %l3
82  %indvars.iv.next = add i64 %indvars.iv, 1
83  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
84  %exitcond = icmp eq i32 %lftr.wideiv, 257
85  br i1 %exitcond, label %._crit_edge, label %.lr.ph
86
87._crit_edge:                                      ; preds = %.lr.ph
88  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
89  ret i32 %sum.0.lcssa
90}
91
92define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
93; CHECK-LABEL: @reduction_sum(
94; CHECK-NEXT:  entry:
95; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
96; CHECK:       vector.ph:
97; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
98; CHECK:       vector.body:
99; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
100; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
101; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[PRED_LOAD_CONTINUE14]] ]
102; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ]
103; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
104; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
105; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
106; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
107; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
108; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
109; CHECK:       pred.load.if:
110; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
111; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
112; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
113; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
114; CHECK:       pred.load.continue:
115; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
116; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
117; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
118; CHECK:       pred.load.if1:
119; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
120; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
121; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
122; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
123; CHECK:       pred.load.continue2:
124; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
125; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
126; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
127; CHECK:       pred.load.if3:
128; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
129; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
130; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
131; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
132; CHECK:       pred.load.continue4:
133; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
134; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
135; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
136; CHECK:       pred.load.if5:
137; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
138; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
139; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
140; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
141; CHECK:       pred.load.continue6:
142; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
143; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
144; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
145; CHECK:       pred.load.if7:
146; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
147; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
148; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
149; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
150; CHECK:       pred.load.continue8:
151; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
152; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
153; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
154; CHECK:       pred.load.if9:
155; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
156; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
157; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
158; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
159; CHECK:       pred.load.continue10:
160; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
161; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
162; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
163; CHECK:       pred.load.if11:
164; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
165; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
166; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
167; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
168; CHECK:       pred.load.continue12:
169; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
170; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
171; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
172; CHECK:       pred.load.if13:
173; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
174; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
175; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
176; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
177; CHECK:       pred.load.continue14:
178; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
179; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> zeroinitializer
180; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
181; CHECK-NEXT:    [[TMP46:%.*]] = add i32 [[TMP45]], [[VEC_PHI]]
182; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
183; CHECK-NEXT:    [[TMP48:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP47]])
184; CHECK-NEXT:    [[TMP49:%.*]] = add i32 [[TMP48]], [[TMP46]]
185; CHECK-NEXT:    [[TMP50:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> zeroinitializer
186; CHECK-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP50]])
187; CHECK-NEXT:    [[TMP52]] = add i32 [[TMP51]], [[TMP49]]
188; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
189; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
190; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
191; CHECK-NEXT:    [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
192; CHECK-NEXT:    br i1 [[TMP53]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
193; CHECK:       middle.block:
194; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
195; CHECK:       scalar.ph:
196; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
197; CHECK:       .lr.ph:
198; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP5:!llvm.loop !.*]]
199; CHECK:       ._crit_edge:
200; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP52]], [[MIDDLE_BLOCK]] ]
201; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
202;
203entry:
204  br label %.lr.ph
205
206.lr.ph:                                           ; preds = %entry, %.lr.ph
207  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
208  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
209  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
210  %l3 = load i32, i32* %l2, align 4
211  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
212  %l5 = load i32, i32* %l4, align 4
213  %l6 = trunc i64 %indvars.iv to i32
214  %l7 = add i32 %sum.02, %l6
215  %l8 = add i32 %l7, %l3
216  %l9 = add i32 %l8, %l5
217  %indvars.iv.next = add i64 %indvars.iv, 1
218  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
219  %exitcond = icmp eq i32 %lftr.wideiv, 257
220  br i1 %exitcond, label %._crit_edge, label %.lr.ph
221
222._crit_edge:                                      ; preds = %.lr.ph
223  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
224  ret i32 %sum.0.lcssa
225}
226
227define i32 @reduction_sum_const(i32* noalias nocapture %A) {
228; CHECK-LABEL: @reduction_sum_const(
229; CHECK-NEXT:  entry:
230; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
231; CHECK:       vector.ph:
232; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
233; CHECK:       vector.body:
234; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
235; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
236; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
237; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
238; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
239; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
240; CHECK:       pred.load.if:
241; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
242; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
243; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
244; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
245; CHECK:       pred.load.continue:
246; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
247; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
248; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
249; CHECK:       pred.load.if1:
250; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
251; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
252; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
253; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1
254; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
255; CHECK:       pred.load.continue2:
256; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
257; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
258; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
259; CHECK:       pred.load.if3:
260; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
261; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
262; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
263; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2
264; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
265; CHECK:       pred.load.continue4:
266; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
267; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
268; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
269; CHECK:       pred.load.if5:
270; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
271; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
272; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
273; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3
274; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
275; CHECK:       pred.load.continue6:
276; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
277; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
278; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
279; CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]]
280; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>, <4 x i32> zeroinitializer
281; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
282; CHECK-NEXT:    [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
283; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
284; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
285; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
286; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
287; CHECK:       middle.block:
288; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
289; CHECK:       scalar.ph:
290; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
291; CHECK:       .lr.ph:
292; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP7:!llvm.loop !.*]]
293; CHECK:       ._crit_edge:
294; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
295; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
296;
297entry:
298  br label %.lr.ph
299
300.lr.ph:                                           ; preds = %entry, %.lr.ph
301  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
302  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
303  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
304  %l3 = load i32, i32* %l2, align 4
305  %l7 = add i32 %sum.02, %l3
306  %l9 = add i32 %l7, 3
307  %indvars.iv.next = add i64 %indvars.iv, 1
308  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
309  %exitcond = icmp eq i32 %lftr.wideiv, 257
310  br i1 %exitcond, label %._crit_edge, label %.lr.ph
311
312._crit_edge:                                      ; preds = %.lr.ph
313  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
314  ret i32 %sum.0.lcssa
315}
316
317define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
318; CHECK-LABEL: @reduction_prod(
319; CHECK-NEXT:  entry:
320; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
321; CHECK:       vector.ph:
322; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
323; CHECK:       vector.body:
324; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
325; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
326; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[PRED_LOAD_CONTINUE14]] ]
327; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ]
328; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
329; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
330; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
331; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
332; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
333; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
334; CHECK:       pred.load.if:
335; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
336; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
337; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
338; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
339; CHECK:       pred.load.continue:
340; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
341; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
342; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
343; CHECK:       pred.load.if1:
344; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
345; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
346; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
347; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
348; CHECK:       pred.load.continue2:
349; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
350; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
351; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
352; CHECK:       pred.load.if3:
353; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
354; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
355; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
356; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
357; CHECK:       pred.load.continue4:
358; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
359; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
360; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
361; CHECK:       pred.load.if5:
362; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
363; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
364; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
365; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
366; CHECK:       pred.load.continue6:
367; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
368; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
369; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
370; CHECK:       pred.load.if7:
371; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
372; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
373; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
374; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
375; CHECK:       pred.load.continue8:
376; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
377; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
378; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
379; CHECK:       pred.load.if9:
380; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
381; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
382; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
383; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
384; CHECK:       pred.load.continue10:
385; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
386; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
387; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
388; CHECK:       pred.load.if11:
389; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
390; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
391; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
392; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
393; CHECK:       pred.load.continue12:
394; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
395; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
396; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
397; CHECK:       pred.load.if13:
398; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
399; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
400; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
401; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
402; CHECK:       pred.load.continue14:
403; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
404; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
405; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP44]])
406; CHECK-NEXT:    [[TMP46:%.*]] = mul i32 [[TMP45]], [[VEC_PHI]]
407; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
408; CHECK-NEXT:    [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]])
409; CHECK-NEXT:    [[TMP49:%.*]] = mul i32 [[TMP48]], [[TMP46]]
410; CHECK-NEXT:    [[TMP50:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
411; CHECK-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP50]])
412; CHECK-NEXT:    [[TMP52]] = mul i32 [[TMP51]], [[TMP49]]
413; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
414; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
415; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
416; CHECK-NEXT:    [[TMP53:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
417; CHECK-NEXT:    br i1 [[TMP53]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
418; CHECK:       middle.block:
419; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
420; CHECK:       scalar.ph:
421; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
422; CHECK:       .lr.ph:
423; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP9:!llvm.loop !.*]]
424; CHECK:       ._crit_edge:
425; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP52]], [[MIDDLE_BLOCK]] ]
426; CHECK-NEXT:    ret i32 [[PROD_0_LCSSA]]
427;
428entry:
429  br label %.lr.ph
430
431.lr.ph:                                           ; preds = %entry, %.lr.ph
432  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
433  %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
434  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
435  %l3 = load i32, i32* %l2, align 4
436  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
437  %l5 = load i32, i32* %l4, align 4
438  %l6 = trunc i64 %indvars.iv to i32
439  %l7 = mul i32 %prod.02, %l6
440  %l8 = mul i32 %l7, %l3
441  %l9 = mul i32 %l8, %l5
442  %indvars.iv.next = add i64 %indvars.iv, 1
443  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
444  %exitcond = icmp eq i32 %lftr.wideiv, 257
445  br i1 %exitcond, label %._crit_edge, label %.lr.ph
446
447._crit_edge:                                      ; preds = %.lr.ph
448  %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
449  ret i32 %prod.0.lcssa
450}
451
452define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
453; CHECK-LABEL: @reduction_mix(
454; CHECK-NEXT:  entry:
455; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
456; CHECK:       vector.ph:
457; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
458; CHECK:       vector.body:
459; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
460; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
461; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[PRED_LOAD_CONTINUE14]] ]
462; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ]
463; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
464; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
465; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
466; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
467; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
468; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
469; CHECK:       pred.load.if:
470; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
471; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
472; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
473; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
474; CHECK:       pred.load.continue:
475; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
476; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
477; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
478; CHECK:       pred.load.if1:
479; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
480; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
481; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
482; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
483; CHECK:       pred.load.continue2:
484; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
485; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
486; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
487; CHECK:       pred.load.if3:
488; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
489; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
490; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
491; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
492; CHECK:       pred.load.continue4:
493; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
494; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
495; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
496; CHECK:       pred.load.if5:
497; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
498; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
499; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
500; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
501; CHECK:       pred.load.continue6:
502; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
503; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
504; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
505; CHECK:       pred.load.if7:
506; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
507; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
508; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
509; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
510; CHECK:       pred.load.continue8:
511; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
512; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
513; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
514; CHECK:       pred.load.if9:
515; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
516; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
517; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
518; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
519; CHECK:       pred.load.continue10:
520; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
521; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
522; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
523; CHECK:       pred.load.if11:
524; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
525; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
526; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
527; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
528; CHECK:       pred.load.continue12:
529; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
530; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
531; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
532; CHECK:       pred.load.if13:
533; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
534; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
535; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
536; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
537; CHECK:       pred.load.continue14:
538; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
539; CHECK-NEXT:    [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP23]]
540; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND15]], <4 x i32> zeroinitializer
541; CHECK-NEXT:    [[TMP46:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP45]])
542; CHECK-NEXT:    [[TMP47:%.*]] = add i32 [[TMP46]], [[VEC_PHI]]
543; CHECK-NEXT:    [[TMP48:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer
544; CHECK-NEXT:    [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP48]])
545; CHECK-NEXT:    [[TMP50]] = add i32 [[TMP49]], [[TMP47]]
546; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
547; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
548; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
549; CHECK-NEXT:    [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
550; CHECK-NEXT:    br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
551; CHECK:       middle.block:
552; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
553; CHECK:       scalar.ph:
554; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
555; CHECK:       .lr.ph:
556; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP11:!llvm.loop !.*]]
557; CHECK:       ._crit_edge:
558; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
559; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
560;
561entry:
562  br label %.lr.ph
563
564.lr.ph:                                           ; preds = %entry, %.lr.ph
565  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
566  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
567  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
568  %l3 = load i32, i32* %l2, align 4
569  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
570  %l5 = load i32, i32* %l4, align 4
571  %l6 = mul nsw i32 %l5, %l3
572  %l7 = trunc i64 %indvars.iv to i32
573  %l8 = add i32 %sum.02, %l7
574  %l9 = add i32 %l8, %l6
575  %indvars.iv.next = add i64 %indvars.iv, 1
576  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
577  %exitcond = icmp eq i32 %lftr.wideiv, 257
578  br i1 %exitcond, label %._crit_edge, label %.lr.ph
579
580._crit_edge:                                      ; preds = %.lr.ph
581  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
582  ret i32 %sum.0.lcssa
583}
584
585define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
586; CHECK-LABEL: @reduction_mul(
587; CHECK-NEXT:  entry:
588; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
589; CHECK:       vector.ph:
590; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
591; CHECK:       vector.body:
592; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
593; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
594; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ]
595; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
596; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
597; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
598; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
599; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
600; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
601; CHECK:       pred.load.if:
602; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
603; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
604; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
605; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
606; CHECK:       pred.load.continue:
607; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
608; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
609; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
610; CHECK:       pred.load.if1:
611; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
612; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
613; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
614; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
615; CHECK:       pred.load.continue2:
616; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
617; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
618; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
619; CHECK:       pred.load.if3:
620; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
621; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
622; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
623; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
624; CHECK:       pred.load.continue4:
625; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
626; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
627; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
628; CHECK:       pred.load.if5:
629; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
630; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
631; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
632; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
633; CHECK:       pred.load.continue6:
634; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
635; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
636; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
637; CHECK:       pred.load.if7:
638; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
639; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
640; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
641; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
642; CHECK:       pred.load.continue8:
643; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
644; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
645; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
646; CHECK:       pred.load.if9:
647; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
648; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
649; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
650; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
651; CHECK:       pred.load.continue10:
652; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
653; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
654; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
655; CHECK:       pred.load.if11:
656; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
657; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
658; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
659; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
660; CHECK:       pred.load.continue12:
661; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
662; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
663; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
664; CHECK:       pred.load.if13:
665; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
666; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
667; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
668; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
669; CHECK:       pred.load.continue14:
670; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
671; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
672; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP44]])
673; CHECK-NEXT:    [[TMP46:%.*]] = mul i32 [[TMP45]], [[VEC_PHI]]
674; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
675; CHECK-NEXT:    [[TMP48:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP47]])
676; CHECK-NEXT:    [[TMP49]] = mul i32 [[TMP48]], [[TMP46]]
677; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
678; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
679; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
680; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
681; CHECK:       middle.block:
682; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
683; CHECK:       scalar.ph:
684; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
685; CHECK:       .lr.ph:
686; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP13:!llvm.loop !.*]]
687; CHECK:       ._crit_edge:
688; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
689; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
690;
691entry:
692  br label %.lr.ph
693
694.lr.ph:                                           ; preds = %entry, %.lr.ph
695  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
696  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
697  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
698  %l3 = load i32, i32* %l2, align 4
699  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
700  %l5 = load i32, i32* %l4, align 4
701  %l6 = mul i32 %sum.02, %l3
702  %l7 = mul i32 %l6, %l5
703  %indvars.iv.next = add i64 %indvars.iv, 1
704  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
705  %exitcond = icmp eq i32 %lftr.wideiv, 257
706  br i1 %exitcond, label %._crit_edge, label %.lr.ph
707
708._crit_edge:                                      ; preds = %.lr.ph
709  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
710  ret i32 %sum.0.lcssa
711}
712
713define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
714; CHECK-LABEL: @reduction_and(
715; CHECK-NEXT:  entry:
716; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
717; CHECK:       vector.ph:
718; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
719; CHECK:       vector.body:
720; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
721; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
722; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ]
723; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
724; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
725; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
726; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
727; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
728; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
729; CHECK:       pred.load.if:
730; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
731; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
732; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
733; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
734; CHECK:       pred.load.continue:
735; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
736; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
737; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
738; CHECK:       pred.load.if1:
739; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
740; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
741; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
742; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
743; CHECK:       pred.load.continue2:
744; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
745; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
746; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
747; CHECK:       pred.load.if3:
748; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
749; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
750; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
751; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
752; CHECK:       pred.load.continue4:
753; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
754; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
755; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
756; CHECK:       pred.load.if5:
757; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
758; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
759; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
760; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
761; CHECK:       pred.load.continue6:
762; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
763; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
764; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
765; CHECK:       pred.load.if7:
766; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
767; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
768; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
769; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
770; CHECK:       pred.load.continue8:
771; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
772; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
773; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
774; CHECK:       pred.load.if9:
775; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
776; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
777; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
778; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
779; CHECK:       pred.load.continue10:
780; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
781; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
782; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
783; CHECK:       pred.load.if11:
784; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
785; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
786; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
787; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
788; CHECK:       pred.load.continue12:
789; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
790; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
791; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
792; CHECK:       pred.load.if13:
793; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
794; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
795; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
796; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
797; CHECK:       pred.load.continue14:
798; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
799; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP23]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
800; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP44]])
801; CHECK-NEXT:    [[TMP46:%.*]] = and i32 [[TMP45]], [[VEC_PHI]]
802; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP43]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
803; CHECK-NEXT:    [[TMP48:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP47]])
804; CHECK-NEXT:    [[TMP49]] = and i32 [[TMP48]], [[TMP46]]
805; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
806; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
807; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
808; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
809; CHECK:       middle.block:
810; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
811; CHECK:       scalar.ph:
812; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
813; CHECK:       for.body:
814; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP15:!llvm.loop !.*]]
815; CHECK:       for.end:
816; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
817; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
818;
819entry:
820  br label %for.body
821
822for.body:                                         ; preds = %entry, %for.body
823  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
824  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
825  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
826  %l0 = load i32, i32* %arrayidx, align 4
827  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
828  %l1 = load i32, i32* %arrayidx2, align 4
829  %add = and i32 %result.08, %l0
830  %and = and i32 %add, %l1
831  %indvars.iv.next = add i64 %indvars.iv, 1
832  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
833  %exitcond = icmp eq i32 %lftr.wideiv, 257
834  br i1 %exitcond, label %for.end, label %for.body
835
836for.end:                                          ; preds = %for.body, %entry
837  %result.0.lcssa = phi i32 [ %and, %for.body ]
838  ret i32 %result.0.lcssa
839}
840
841define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
842; CHECK-LABEL: @reduction_or(
843; CHECK-NEXT:  entry:
844; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
845; CHECK:       vector.ph:
846; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
847; CHECK:       vector.body:
848; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
849; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
850; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_LOAD_CONTINUE14]] ]
851; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
852; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
853; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
854; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
855; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
856; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
857; CHECK:       pred.load.if:
858; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
859; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
860; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
861; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
862; CHECK:       pred.load.continue:
863; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
864; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
865; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
866; CHECK:       pred.load.if1:
867; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
868; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
869; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
870; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
871; CHECK:       pred.load.continue2:
872; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
873; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
874; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
875; CHECK:       pred.load.if3:
876; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
877; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
878; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
879; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
880; CHECK:       pred.load.continue4:
881; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
882; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
883; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
884; CHECK:       pred.load.if5:
885; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
886; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
887; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
888; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
889; CHECK:       pred.load.continue6:
890; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
891; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
892; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
893; CHECK:       pred.load.if7:
894; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
895; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
896; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
897; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
898; CHECK:       pred.load.continue8:
899; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
900; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
901; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
902; CHECK:       pred.load.if9:
903; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
904; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
905; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
906; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
907; CHECK:       pred.load.continue10:
908; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
909; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
910; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
911; CHECK:       pred.load.if11:
912; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
913; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
914; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
915; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
916; CHECK:       pred.load.continue12:
917; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
918; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
919; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
920; CHECK:       pred.load.if13:
921; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
922; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
923; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
924; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
925; CHECK:       pred.load.continue14:
926; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
927; CHECK-NEXT:    [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP23]]
928; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer
929; CHECK-NEXT:    [[TMP46:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP45]])
930; CHECK-NEXT:    [[TMP47]] = or i32 [[TMP46]], [[VEC_PHI]]
931; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
932; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
933; CHECK-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
934; CHECK-NEXT:    br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP16:!llvm.loop !.*]]
935; CHECK:       middle.block:
936; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
937; CHECK:       scalar.ph:
938; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
939; CHECK:       for.body:
940; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP17:!llvm.loop !.*]]
941; CHECK:       for.end:
942; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ]
943; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
944;
945entry:
946  br label %for.body
947
948for.body:                                         ; preds = %entry, %for.body
949  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
950  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
951  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
952  %l0 = load i32, i32* %arrayidx, align 4
953  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
954  %l1 = load i32, i32* %arrayidx2, align 4
955  %add = add nsw i32 %l1, %l0
956  %or = or i32 %add, %result.08
957  %indvars.iv.next = add i64 %indvars.iv, 1
958  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
959  %exitcond = icmp eq i32 %lftr.wideiv, 257
960  br i1 %exitcond, label %for.end, label %for.body
961
962for.end:                                          ; preds = %for.body, %entry
963  %result.0.lcssa = phi i32 [ %or, %for.body ]
964  ret i32 %result.0.lcssa
965}
966
967define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
968; CHECK-LABEL: @reduction_xor(
969; CHECK-NEXT:  entry:
970; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
971; CHECK:       vector.ph:
972; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
973; CHECK:       vector.body:
974; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
975; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
976; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_LOAD_CONTINUE14]] ]
977; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
978; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
979; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
980; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
981; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
982; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
983; CHECK:       pred.load.if:
984; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
985; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
986; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
987; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
988; CHECK:       pred.load.continue:
989; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
990; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
991; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
992; CHECK:       pred.load.if1:
993; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]]
994; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4
995; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
996; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
997; CHECK:       pred.load.continue2:
998; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
999; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1000; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1001; CHECK:       pred.load.if3:
1002; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
1003; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
1004; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
1005; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1006; CHECK:       pred.load.continue4:
1007; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
1008; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1009; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1010; CHECK:       pred.load.if5:
1011; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]]
1012; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
1013; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
1014; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1015; CHECK:       pred.load.continue6:
1016; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1017; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
1018; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1019; CHECK:       pred.load.if7:
1020; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
1021; CHECK-NEXT:    [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4
1022; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
1023; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1024; CHECK:       pred.load.continue8:
1025; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
1026; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
1027; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1028; CHECK:       pred.load.if9:
1029; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
1030; CHECK-NEXT:    [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4
1031; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
1032; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1033; CHECK:       pred.load.continue10:
1034; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
1035; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1036; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1037; CHECK:       pred.load.if11:
1038; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
1039; CHECK-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4
1040; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
1041; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1042; CHECK:       pred.load.continue12:
1043; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
1044; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1045; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
1046; CHECK:       pred.load.if13:
1047; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
1048; CHECK-NEXT:    [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4
1049; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
1050; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1051; CHECK:       pred.load.continue14:
1052; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
1053; CHECK-NEXT:    [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP23]]
1054; CHECK-NEXT:    [[TMP45:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP44]], <4 x i32> zeroinitializer
1055; CHECK-NEXT:    [[TMP46:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP45]])
1056; CHECK-NEXT:    [[TMP47]] = xor i32 [[TMP46]], [[VEC_PHI]]
1057; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1058; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1059; CHECK-NEXT:    [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1060; CHECK-NEXT:    br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP18:!llvm.loop !.*]]
1061; CHECK:       middle.block:
1062; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1063; CHECK:       scalar.ph:
1064; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1065; CHECK:       for.body:
1066; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP19:!llvm.loop !.*]]
1067; CHECK:       for.end:
1068; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP47]], [[MIDDLE_BLOCK]] ]
1069; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
1070;
1071entry:
1072  br label %for.body
1073
1074for.body:                                         ; preds = %entry, %for.body
1075  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1076  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
1077  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
1078  %l0 = load i32, i32* %arrayidx, align 4
1079  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
1080  %l1 = load i32, i32* %arrayidx2, align 4
1081  %add = add nsw i32 %l1, %l0
1082  %xor = xor i32 %add, %result.08
1083  %indvars.iv.next = add i64 %indvars.iv, 1
1084  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1085  %exitcond = icmp eq i32 %lftr.wideiv, 257
1086  br i1 %exitcond, label %for.end, label %for.body
1087
1088for.end:                                          ; preds = %for.body, %entry
1089  %result.0.lcssa = phi i32 [ %xor, %for.body ]
1090  ret i32 %result.0.lcssa
1091}
1092
1093define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
1094; CHECK-LABEL: @reduction_fadd(
1095; CHECK-NEXT:  entry:
1096; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1097; CHECK:       vector.ph:
1098; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1099; CHECK:       vector.body:
1100; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
1101; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
1102; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ]
1103; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
1104; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
1105; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
1106; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
1107; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
1108; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1109; CHECK:       pred.load.if:
1110; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1111; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
1112; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
1113; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1114; CHECK:       pred.load.continue:
1115; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
1116; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
1117; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1118; CHECK:       pred.load.if1:
1119; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
1120; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
1121; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP11]], i32 1
1122; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1123; CHECK:       pred.load.continue2:
1124; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
1125; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1126; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1127; CHECK:       pred.load.if3:
1128; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
1129; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
1130; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP16]], i32 2
1131; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1132; CHECK:       pred.load.continue4:
1133; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
1134; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1135; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1136; CHECK:       pred.load.if5:
1137; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
1138; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP20]], align 4
1139; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP21]], i32 3
1140; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1141; CHECK:       pred.load.continue6:
1142; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1143; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
1144; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1145; CHECK:       pred.load.if7:
1146; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
1147; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
1148; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0
1149; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1150; CHECK:       pred.load.continue8:
1151; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
1152; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
1153; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1154; CHECK:       pred.load.if9:
1155; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
1156; CHECK-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP30]], align 4
1157; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP31]], i32 1
1158; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1159; CHECK:       pred.load.continue10:
1160; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
1161; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1162; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1163; CHECK:       pred.load.if11:
1164; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
1165; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
1166; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP36]], i32 2
1167; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1168; CHECK:       pred.load.continue12:
1169; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
1170; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1171; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
1172; CHECK:       pred.load.if13:
1173; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
1174; CHECK-NEXT:    [[TMP41:%.*]] = load float, float* [[TMP40]], align 4
1175; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP41]], i32 3
1176; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1177; CHECK:       pred.load.continue14:
1178; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x float> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
1179; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP23]], <4 x float> zeroinitializer
1180; CHECK-NEXT:    [[TMP45:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP44]])
1181; CHECK-NEXT:    [[TMP46:%.*]] = fadd float [[TMP45]], [[VEC_PHI]]
1182; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP43]], <4 x float> zeroinitializer
1183; CHECK-NEXT:    [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP47]])
1184; CHECK-NEXT:    [[TMP49]] = fadd float [[TMP48]], [[TMP46]]
1185; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1186; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1187; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1188; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP20:!llvm.loop !.*]]
1189; CHECK:       middle.block:
1190; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1191; CHECK:       scalar.ph:
1192; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1193; CHECK:       for.body:
1194; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP21:!llvm.loop !.*]]
1195; CHECK:       for.end:
1196; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
1197; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
1198;
1199entry:
1200  br label %for.body
1201
1202for.body:                                         ; preds = %entry, %for.body
1203  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1204  %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
1205  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
1206  %l0 = load float, float* %arrayidx, align 4
1207  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
1208  %l1 = load float, float* %arrayidx2, align 4
1209  %add = fadd fast float %result.08, %l0
1210  %fadd = fadd fast float %add, %l1
1211  %indvars.iv.next = add i64 %indvars.iv, 1
1212  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1213  %exitcond = icmp eq i32 %lftr.wideiv, 257
1214  br i1 %exitcond, label %for.end, label %for.body
1215
1216for.end:                                          ; preds = %for.body, %entry
1217  %result.0.lcssa = phi float [ %fadd, %for.body ]
1218  ret float %result.0.lcssa
1219}
1220
1221define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
1222; CHECK-LABEL: @reduction_fmul(
1223; CHECK-NEXT:  entry:
1224; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1225; CHECK:       vector.ph:
1226; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1227; CHECK:       vector.body:
1228; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
1229; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
1230; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_LOAD_CONTINUE14]] ]
1231; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
1232; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
1233; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
1234; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
1235; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
1236; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1237; CHECK:       pred.load.if:
1238; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1239; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP5]], align 4
1240; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i32 0
1241; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1242; CHECK:       pred.load.continue:
1243; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
1244; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
1245; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1246; CHECK:       pred.load.if1:
1247; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
1248; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP10]], align 4
1249; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP11]], i32 1
1250; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1251; CHECK:       pred.load.continue2:
1252; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
1253; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1254; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1255; CHECK:       pred.load.if3:
1256; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP1]]
1257; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP15]], align 4
1258; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP16]], i32 2
1259; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1260; CHECK:       pred.load.continue4:
1261; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
1262; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1263; CHECK-NEXT:    br i1 [[TMP19]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
1264; CHECK:       pred.load.if5:
1265; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
1266; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP20]], align 4
1267; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP21]], i32 3
1268; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1269; CHECK:       pred.load.continue6:
1270; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1271; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
1272; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
1273; CHECK:       pred.load.if7:
1274; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
1275; CHECK-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP25]], align 4
1276; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> poison, float [[TMP26]], i32 0
1277; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
1278; CHECK:       pred.load.continue8:
1279; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP27]], [[PRED_LOAD_IF7]] ]
1280; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
1281; CHECK-NEXT:    br i1 [[TMP29]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
1282; CHECK:       pred.load.if9:
1283; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP0]]
1284; CHECK-NEXT:    [[TMP31:%.*]] = load float, float* [[TMP30]], align 4
1285; CHECK-NEXT:    [[TMP32:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP31]], i32 1
1286; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
1287; CHECK:       pred.load.continue10:
1288; CHECK-NEXT:    [[TMP33:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP32]], [[PRED_LOAD_IF9]] ]
1289; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
1290; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
1291; CHECK:       pred.load.if11:
1292; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP1]]
1293; CHECK-NEXT:    [[TMP36:%.*]] = load float, float* [[TMP35]], align 4
1294; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP36]], i32 2
1295; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
1296; CHECK:       pred.load.continue12:
1297; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP33]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP37]], [[PRED_LOAD_IF11]] ]
1298; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
1299; CHECK-NEXT:    br i1 [[TMP39]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
1300; CHECK:       pred.load.if13:
1301; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP2]]
1302; CHECK-NEXT:    [[TMP41:%.*]] = load float, float* [[TMP40]], align 4
1303; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP41]], i32 3
1304; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
1305; CHECK:       pred.load.continue14:
1306; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x float> [ [[TMP38]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP42]], [[PRED_LOAD_IF13]] ]
1307; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP23]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1308; CHECK-NEXT:    [[TMP45:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP44]])
1309; CHECK-NEXT:    [[TMP46:%.*]] = fmul float [[TMP45]], [[VEC_PHI]]
1310; CHECK-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP43]], <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1311; CHECK-NEXT:    [[TMP48:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP47]])
1312; CHECK-NEXT:    [[TMP49]] = fmul float [[TMP48]], [[TMP46]]
1313; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1314; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1315; CHECK-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1316; CHECK-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP22:!llvm.loop !.*]]
1317; CHECK:       middle.block:
1318; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1319; CHECK:       scalar.ph:
1320; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1321; CHECK:       for.body:
1322; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP23:!llvm.loop !.*]]
1323; CHECK:       for.end:
1324; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
1325; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
1326;
1327entry:
1328  br label %for.body
1329
1330for.body:                                         ; preds = %entry, %for.body
1331  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1332  %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
1333  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
1334  %l0 = load float, float* %arrayidx, align 4
1335  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
1336  %l1 = load float, float* %arrayidx2, align 4
1337  %add = fmul fast float %result.08, %l0
1338  %fmul = fmul fast float %add, %l1
1339  %indvars.iv.next = add i64 %indvars.iv, 1
1340  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1341  %exitcond = icmp eq i32 %lftr.wideiv, 257
1342  br i1 %exitcond, label %for.end, label %for.body
1343
1344for.end:                                          ; preds = %for.body, %entry
1345  %result.0.lcssa = phi float [ %fmul, %for.body ]
1346  ret float %result.0.lcssa
1347}
1348
1349define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
1350; CHECK-LABEL: @reduction_min(
1351; CHECK-NEXT:  entry:
1352; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1353; CHECK:       vector.ph:
1354; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1355; CHECK:       vector.body:
1356; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1357; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1358; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1359; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
1360; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1361; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1362; CHECK:       pred.load.if:
1363; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
1364; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
1365; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
1366; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1367; CHECK:       pred.load.continue:
1368; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
1369; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1370; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1371; CHECK:       pred.load.if1:
1372; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
1373; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
1374; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
1375; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1
1376; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1377; CHECK:       pred.load.continue2:
1378; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
1379; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1380; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1381; CHECK:       pred.load.if3:
1382; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
1383; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1384; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
1385; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2
1386; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1387; CHECK:       pred.load.continue4:
1388; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
1389; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1390; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1391; CHECK:       pred.load.if5:
1392; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
1393; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
1394; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
1395; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3
1396; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1397; CHECK:       pred.load.continue6:
1398; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1399; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
1400; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
1401; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP25]], [[VEC_PHI]]
1402; CHECK-NEXT:    [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]]
1403; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1404; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1405; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1406; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP24:!llvm.loop !.*]]
1407; CHECK:       middle.block:
1408; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1409; CHECK:       scalar.ph:
1410; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1411; CHECK:       for.body:
1412; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP25:!llvm.loop !.*]]
1413; CHECK:       for.end:
1414; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1415; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
1416;
1417entry:
1418  br label %for.body
1419
1420for.body:                                         ; preds = %entry, %for.body
1421  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1422  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
1423  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
1424  %l0 = load i32, i32* %arrayidx, align 4
1425  %c0 = icmp slt i32 %result.08, %l0
1426  %v0 = select i1 %c0, i32 %result.08, i32 %l0
1427  %indvars.iv.next = add i64 %indvars.iv, 1
1428  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1429  %exitcond = icmp eq i32 %lftr.wideiv, 257
1430  br i1 %exitcond, label %for.end, label %for.body
1431
1432for.end:                                          ; preds = %for.body, %entry
1433  %result.0.lcssa = phi i32 [ %v0, %for.body ]
1434  ret i32 %result.0.lcssa
1435}
1436
1437define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
1438; CHECK-LABEL: @reduction_max(
1439; CHECK-NEXT:  entry:
1440; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1441; CHECK:       vector.ph:
1442; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1443; CHECK:       vector.body:
1444; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1445; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1446; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1447; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], <i64 257, i64 257, i64 257, i64 257>
1448; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1449; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1450; CHECK:       pred.load.if:
1451; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
1452; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
1453; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i32 0
1454; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1455; CHECK:       pred.load.continue:
1456; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
1457; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1458; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1459; CHECK:       pred.load.if1:
1460; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
1461; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
1462; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4
1463; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i32 1
1464; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1465; CHECK:       pred.load.continue2:
1466; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
1467; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1468; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1469; CHECK:       pred.load.if3:
1470; CHECK-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 2
1471; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1472; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
1473; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i32 2
1474; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1475; CHECK:       pred.load.continue4:
1476; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
1477; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1478; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1479; CHECK:       pred.load.if5:
1480; CHECK-NEXT:    [[TMP19:%.*]] = or i64 [[INDEX]], 3
1481; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
1482; CHECK-NEXT:    [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4
1483; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i32 3
1484; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1485; CHECK:       pred.load.continue6:
1486; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1487; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
1488; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
1489; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt i32 [[TMP25]], [[VEC_PHI]]
1490; CHECK-NEXT:    [[RDX_MINMAX_SELECT]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP25]], i32 [[VEC_PHI]]
1491; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1492; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
1493; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1494; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP26:!llvm.loop !.*]]
1495; CHECK:       middle.block:
1496; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1497; CHECK:       scalar.ph:
1498; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1499; CHECK:       for.body:
1500; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], [[LOOP27:!llvm.loop !.*]]
1501; CHECK:       for.end:
1502; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1503; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
1504;
1505entry:
1506  br label %for.body
1507
1508for.body:                                         ; preds = %entry, %for.body
1509  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1510  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
1511  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
1512  %l0 = load i32, i32* %arrayidx, align 4
1513  %c0 = icmp ugt i32 %result.08, %l0
1514  %v0 = select i1 %c0, i32 %result.08, i32 %l0
1515  %indvars.iv.next = add i64 %indvars.iv, 1
1516  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1517  %exitcond = icmp eq i32 %lftr.wideiv, 257
1518  br i1 %exitcond, label %for.end, label %for.body
1519
1520for.end:                                          ; preds = %for.body, %entry
1521  %result.0.lcssa = phi i32 [ %v0, %for.body ]
1522  ret i32 %result.0.lcssa
1523}
1524
1525; Conditional reductions with multi-input phis.
1526define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
1527; CHECK-LABEL: @reduction_conditional(
1528; CHECK-NEXT:  entry:
1529; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1530; CHECK:       vector.ph:
1531; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i32 0
1532; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1533; CHECK:       vector.body:
1534; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1535; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
1536; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1537; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
1538; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1539; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
1540; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
1541; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
1542; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
1543; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1544; CHECK-NEXT:    [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
1545; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
1546; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP8]], [[TMP7]]
1547; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
1548; CHECK-NEXT:    [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]]
1549; CHECK-NEXT:    [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true>
1550; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
1551; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
1552; CHECK-NEXT:    [[TMP13:%.*]] = or <4 x i1> [[TMP11]], [[TMP12]]
1553; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
1554; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1555; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
1556; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP28:!llvm.loop !.*]]
1557; CHECK:       middle.block:
1558; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[PREDPHI3]])
1559; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1560; CHECK:       scalar.ph:
1561; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1562; CHECK:       for.body:
1563; CHECK-NEXT:    br i1 undef, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
1564; CHECK:       if.then:
1565; CHECK-NEXT:    br i1 undef, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]]
1566; CHECK:       if.then8:
1567; CHECK-NEXT:    br label [[FOR_INC]]
1568; CHECK:       if.else:
1569; CHECK-NEXT:    br i1 undef, label [[IF_THEN16:%.*]], label [[FOR_INC]]
1570; CHECK:       if.then16:
1571; CHECK-NEXT:    br label [[FOR_INC]]
1572; CHECK:       for.inc:
1573; CHECK-NEXT:    br i1 undef, label [[FOR_BODY]], label [[FOR_END]], [[LOOP29:!llvm.loop !.*]]
1574; CHECK:       for.end:
1575; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ undef, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
1576; CHECK-NEXT:    ret float [[SUM_1_LCSSA]]
1577;
1578entry:
1579  br label %for.body
1580
1581for.body:
1582  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1583  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
1584  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
1585  %l0 = load float, float* %arrayidx, align 4
1586  %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
1587  %l1 = load float, float* %arrayidx2, align 4
1588  %cmp3 = fcmp ogt float %l0, %l1
1589  br i1 %cmp3, label %if.then, label %for.inc
1590
1591if.then:
1592  %cmp6 = fcmp ogt float %l1, 1.000000e+00
1593  br i1 %cmp6, label %if.then8, label %if.else
1594
1595if.then8:
1596  %add = fadd fast float %sum.033, %l0
1597  br label %for.inc
1598
1599if.else:
1600  %cmp14 = fcmp ogt float %l0, 2.000000e+00
1601  br i1 %cmp14, label %if.then16, label %for.inc
1602
1603if.then16:
1604  %add19 = fadd fast float %sum.033, %l1
1605  br label %for.inc
1606
1607for.inc:
1608  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
1609  %indvars.iv.next = add i64 %indvars.iv, 1
1610  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1611  %exitcond = icmp ne i32 %lftr.wideiv, 128
1612  br i1 %exitcond, label %for.body, label %for.end
1613
1614for.end:
1615  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
1616  ret float %sum.1.lcssa
1617}
1618
1619define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
1620; CHECK-LABEL: @reduction_add_trunc(
1621; CHECK-NEXT:  entry:
1622; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1623; CHECK:       vector.ph:
1624; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1625; CHECK:       vector.body:
1626; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1627; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1628; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1629; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
1630; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
1631; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1632; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1633; CHECK:       pred.load.if:
1634; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
1635; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP3]]
1636; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 4
1637; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i32 0
1638; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1639; CHECK:       pred.load.continue:
1640; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
1641; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1642; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1643; CHECK:       pred.load.if1:
1644; CHECK-NEXT:    [[TMP9:%.*]] = or i32 [[INDEX]], 1
1645; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
1646; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP10]]
1647; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[TMP11]], align 4
1648; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i32 1
1649; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1650; CHECK:       pred.load.continue2:
1651; CHECK-NEXT:    [[TMP14:%.*]] = phi <4 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
1652; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1653; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1654; CHECK:       pred.load.if3:
1655; CHECK-NEXT:    [[TMP16:%.*]] = or i32 [[INDEX]], 2
1656; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
1657; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP17]]
1658; CHECK-NEXT:    [[TMP19:%.*]] = load i8, i8* [[TMP18]], align 4
1659; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i32 2
1660; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1661; CHECK:       pred.load.continue4:
1662; CHECK-NEXT:    [[TMP21:%.*]] = phi <4 x i8> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ]
1663; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1664; CHECK-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1665; CHECK:       pred.load.if5:
1666; CHECK-NEXT:    [[TMP23:%.*]] = or i32 [[INDEX]], 3
1667; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
1668; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP24]]
1669; CHECK-NEXT:    [[TMP26:%.*]] = load i8, i8* [[TMP25]], align 4
1670; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i32 3
1671; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1672; CHECK:       pred.load.continue6:
1673; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
1674; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
1675; CHECK-NEXT:    [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
1676; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
1677; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
1678; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1679; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP30:!llvm.loop !.*]]
1680; CHECK:       middle.block:
1681; CHECK-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]]
1682; CHECK-NEXT:    [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8>
1683; CHECK-NEXT:    [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]])
1684; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
1685; CHECK:       scalar.ph:
1686; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
1687; CHECK:       .lr.ph:
1688; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP31:!llvm.loop !.*]]
1689; CHECK:       ._crit_edge:
1690; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ undef, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1691; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
1692;
1693entry:
1694  br label %.lr.ph
1695
1696.lr.ph:                                           ; preds = %entry, %.lr.ph
1697  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
1698  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
1699  %sum.02 = and i32 %sum.02p, 255
1700  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
1701  %l3 = load i8, i8* %l2, align 4
1702  %l3e = zext i8 %l3 to i32
1703  %l9 = add i32 %sum.02, %l3e
1704  %indvars.iv.next = add i32 %indvars.iv, 1
1705  %exitcond = icmp eq i32 %indvars.iv.next, 257
1706  br i1 %exitcond, label %._crit_edge, label %.lr.ph
1707
1708._crit_edge:                                      ; preds = %.lr.ph
1709  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
1710  %ret = trunc i32 %sum.0.lcssa to i8
1711  ret i8 %ret
1712}
1713
1714
1715define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
1716; CHECK-LABEL: @reduction_and_trunc(
1717; CHECK-NEXT:  entry:
1718; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1719; CHECK:       vector.ph:
1720; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1721; CHECK:       vector.body:
1722; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1723; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1724; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 255, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1725; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], <i32 257, i32 257, i32 257, i32 257>
1726; CHECK-NEXT:    [[TMP1:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 255, i32 255, i32 255, i32 255>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1727; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP1]])
1728; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[VEC_PHI]]
1729; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
1730; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1731; CHECK:       pred.load.if:
1732; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
1733; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP5]]
1734; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[TMP6]], align 4
1735; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i8> poison, i8 [[TMP7]], i32 0
1736; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1737; CHECK:       pred.load.continue:
1738; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ]
1739; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
1740; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1741; CHECK:       pred.load.if1:
1742; CHECK-NEXT:    [[TMP11:%.*]] = or i32 [[INDEX]], 1
1743; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
1744; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP12]]
1745; CHECK-NEXT:    [[TMP14:%.*]] = load i8, i8* [[TMP13]], align 4
1746; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> [[TMP9]], i8 [[TMP14]], i32 1
1747; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1748; CHECK:       pred.load.continue2:
1749; CHECK-NEXT:    [[TMP16:%.*]] = phi <4 x i8> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP15]], [[PRED_LOAD_IF1]] ]
1750; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
1751; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1752; CHECK:       pred.load.if3:
1753; CHECK-NEXT:    [[TMP18:%.*]] = or i32 [[INDEX]], 2
1754; CHECK-NEXT:    [[TMP19:%.*]] = sext i32 [[TMP18]] to i64
1755; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP19]]
1756; CHECK-NEXT:    [[TMP21:%.*]] = load i8, i8* [[TMP20]], align 4
1757; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP21]], i32 2
1758; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1759; CHECK:       pred.load.continue4:
1760; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i8> [ [[TMP16]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP22]], [[PRED_LOAD_IF3]] ]
1761; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
1762; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1763; CHECK:       pred.load.if5:
1764; CHECK-NEXT:    [[TMP25:%.*]] = or i32 [[INDEX]], 3
1765; CHECK-NEXT:    [[TMP26:%.*]] = sext i32 [[TMP25]] to i64
1766; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[TMP26]]
1767; CHECK-NEXT:    [[TMP28:%.*]] = load i8, i8* [[TMP27]], align 4
1768; CHECK-NEXT:    [[TMP29:%.*]] = insertelement <4 x i8> [[TMP23]], i8 [[TMP28]], i32 3
1769; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1770; CHECK:       pred.load.continue6:
1771; CHECK-NEXT:    [[TMP30:%.*]] = phi <4 x i8> [ [[TMP23]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP29]], [[PRED_LOAD_IF5]] ]
1772; CHECK-NEXT:    [[TMP31:%.*]] = zext <4 x i8> [[TMP30]] to <4 x i32>
1773; CHECK-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP31]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1774; CHECK-NEXT:    [[TMP33:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP32]])
1775; CHECK-NEXT:    [[TMP34]] = and i32 [[TMP33]], [[TMP3]]
1776; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
1777; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
1778; CHECK-NEXT:    [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1779; CHECK-NEXT:    br i1 [[TMP35]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP32:!llvm.loop !.*]]
1780; CHECK:       middle.block:
1781; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
1782; CHECK:       scalar.ph:
1783; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
1784; CHECK:       .lr.ph:
1785; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], [[LOOP33:!llvm.loop !.*]]
1786; CHECK:       ._crit_edge:
1787; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1788; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[SUM_0_LCSSA]] to i8
1789; CHECK-NEXT:    ret i8 [[RET]]
1790;
1791entry:
1792  br label %.lr.ph
1793
1794.lr.ph:                                           ; preds = %entry, %.lr.ph
1795  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
1796  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
1797  %sum.02 = and i32 %sum.02p, 255
1798  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
1799  %l3 = load i8, i8* %l2, align 4
1800  %l3e = zext i8 %l3 to i32
1801  %l9 = and i32 %sum.02, %l3e
1802  %indvars.iv.next = add i32 %indvars.iv, 1
1803  %exitcond = icmp eq i32 %indvars.iv.next, 257
1804  br i1 %exitcond, label %._crit_edge, label %.lr.ph
1805
1806._crit_edge:                                      ; preds = %.lr.ph
1807  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
1808  %ret = trunc i32 %sum.0.lcssa to i8
1809  ret i8 %ret
1810}
1811