xref: /llvm-project/llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll (revision 82821254f532c1dbdfd5d985ef7130511efaaa83)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5
6define i32 @reduction_sum_single(ptr noalias nocapture %A) {
7; CHECK-LABEL: @reduction_sum_single(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10; CHECK:       vector.ph:
11; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
12; CHECK:       vector.body:
13; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
14; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
15; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_LOAD_CONTINUE6]] ]
16; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
17; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
18; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
19; CHECK:       pred.load.if:
20; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
21; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
22; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
23; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
24; CHECK:       pred.load.continue:
25; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
26; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
27; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
28; CHECK:       pred.load.if1:
29; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1
30; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
31; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
32; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
33; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
34; CHECK:       pred.load.continue2:
35; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
36; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
37; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
38; CHECK:       pred.load.if3:
39; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2
40; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
41; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
42; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
43; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
44; CHECK:       pred.load.continue4:
45; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
46; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
47; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
48; CHECK:       pred.load.if5:
49; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3
50; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
51; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
52; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
53; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
54; CHECK:       pred.load.continue6:
55; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
56; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
57; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
58; CHECK-NEXT:    [[TMP26]] = add i32 [[TMP25]], [[VEC_PHI]]
59; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
60; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
61; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
62; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
63; CHECK:       middle.block:
64; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
65; CHECK:       scalar.ph:
66; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
67; CHECK:       .lr.ph:
68; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
69; CHECK:       ._crit_edge:
70; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
71; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
72;
73entry:
74  br label %.lr.ph
75
76.lr.ph:                                           ; preds = %entry, %.lr.ph
77  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
78  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
79  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
80  %l3 = load i32, ptr %l2, align 4
81  %l7 = add i32 %sum.02, %l3
82  %indvars.iv.next = add i64 %indvars.iv, 1
83  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
84  %exitcond = icmp eq i32 %lftr.wideiv, 257
85  br i1 %exitcond, label %._crit_edge, label %.lr.ph
86
87._crit_edge:                                      ; preds = %.lr.ph
88  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
89  ret i32 %sum.0.lcssa
90}
91
92define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
93; CHECK-LABEL: @reduction_sum(
94; CHECK-NEXT:  entry:
95; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
96; CHECK:       vector.ph:
97; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
98; CHECK:       vector.body:
99; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
100; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
101; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
102; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
103; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
104; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
105; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
106; CHECK:       pred.load.if:
107; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
108; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
109; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
110; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
111; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
112; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
113; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
114; CHECK:       pred.load.continue:
115; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
116; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
117; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
118; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
119; CHECK:       pred.load.if3:
120; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
121; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
122; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
123; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
124; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
125; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
126; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
127; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
128; CHECK:       pred.load.continue4:
129; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
130; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
131; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
132; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
133; CHECK:       pred.load.if5:
134; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
135; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
136; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
137; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
138; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
139; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
140; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
141; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
142; CHECK:       pred.load.continue6:
143; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
144; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
145; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
146; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
147; CHECK:       pred.load.if7:
148; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
149; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
150; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
151; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
152; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
153; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
154; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
155; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
156; CHECK:       pred.load.continue8:
157; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
158; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
159; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
160; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
161; CHECK-NEXT:    [[TMP42:%.*]] = add i32 [[TMP41]], [[VEC_PHI]]
162; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> zeroinitializer
163; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]])
164; CHECK-NEXT:    [[TMP45:%.*]] = add i32 [[TMP44]], [[TMP42]]
165; CHECK-NEXT:    [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> zeroinitializer
166; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP46]])
167; CHECK-NEXT:    [[TMP48]] = add i32 [[TMP47]], [[TMP45]]
168; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
169; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
170; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
171; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
172; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
173; CHECK:       middle.block:
174; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
175; CHECK:       scalar.ph:
176; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
177; CHECK:       .lr.ph:
178; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]]
179; CHECK:       ._crit_edge:
180; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
181; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
182;
183entry:
184  br label %.lr.ph
185
186.lr.ph:                                           ; preds = %entry, %.lr.ph
187  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
188  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
189  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
190  %l3 = load i32, ptr %l2, align 4
191  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
192  %l5 = load i32, ptr %l4, align 4
193  %l6 = trunc i64 %indvars.iv to i32
194  %l7 = add i32 %sum.02, %l6
195  %l8 = add i32 %l7, %l3
196  %l9 = add i32 %l8, %l5
197  %indvars.iv.next = add i64 %indvars.iv, 1
198  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
199  %exitcond = icmp eq i32 %lftr.wideiv, 257
200  br i1 %exitcond, label %._crit_edge, label %.lr.ph
201
202._crit_edge:                                      ; preds = %.lr.ph
203  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
204  ret i32 %sum.0.lcssa
205}
206
207define i32 @reduction_sum_const(ptr noalias nocapture %A) {
208; CHECK-LABEL: @reduction_sum_const(
209; CHECK-NEXT:  entry:
210; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
211; CHECK:       vector.ph:
212; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
213; CHECK:       vector.body:
214; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
215; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
216; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
217; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
218; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
219; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
220; CHECK:       pred.load.if:
221; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
222; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
223; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
224; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
225; CHECK:       pred.load.continue:
226; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
227; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
228; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
229; CHECK:       pred.load.if1:
230; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1
231; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
232; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
233; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
234; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
235; CHECK:       pred.load.continue2:
236; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
237; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
238; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
239; CHECK:       pred.load.if3:
240; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2
241; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
242; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
243; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
244; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
245; CHECK:       pred.load.continue4:
246; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
247; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
248; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
249; CHECK:       pred.load.if5:
250; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3
251; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
252; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
253; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
254; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
255; CHECK:       pred.load.continue6:
256; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
257; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
258; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
259; CHECK-NEXT:    [[TMP26:%.*]] = add i32 [[TMP25]], [[VEC_PHI]]
260; CHECK-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 3), <4 x i32> zeroinitializer
261; CHECK-NEXT:    [[TMP28:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP27]])
262; CHECK-NEXT:    [[TMP29]] = add i32 [[TMP28]], [[TMP26]]
263; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
264; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
265; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
266; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
267; CHECK:       middle.block:
268; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
269; CHECK:       scalar.ph:
270; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
271; CHECK:       .lr.ph:
272; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
273; CHECK:       ._crit_edge:
274; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP29]], [[MIDDLE_BLOCK]] ]
275; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
276;
277entry:
278  br label %.lr.ph
279
280.lr.ph:                                           ; preds = %entry, %.lr.ph
281  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
282  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
283  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
284  %l3 = load i32, ptr %l2, align 4
285  %l7 = add i32 %sum.02, %l3
286  %l9 = add i32 %l7, 3
287  %indvars.iv.next = add i64 %indvars.iv, 1
288  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
289  %exitcond = icmp eq i32 %lftr.wideiv, 257
290  br i1 %exitcond, label %._crit_edge, label %.lr.ph
291
292._crit_edge:                                      ; preds = %.lr.ph
293  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
294  ret i32 %sum.0.lcssa
295}
296
297define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
298; CHECK-LABEL: @reduction_prod(
299; CHECK-NEXT:  entry:
300; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
301; CHECK:       vector.ph:
302; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
303; CHECK:       vector.body:
304; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
305; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
306; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE8]] ]
307; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
308; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
309; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
310; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
311; CHECK:       pred.load.if:
312; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
313; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
314; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
315; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
316; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
317; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
318; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
319; CHECK:       pred.load.continue:
320; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
321; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
322; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
323; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
324; CHECK:       pred.load.if3:
325; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
326; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
327; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
328; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
329; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
330; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
331; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
332; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
333; CHECK:       pred.load.continue4:
334; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
335; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
336; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
337; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
338; CHECK:       pred.load.if5:
339; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
340; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
341; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
342; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
343; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
344; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
345; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
346; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
347; CHECK:       pred.load.continue6:
348; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
349; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
350; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
351; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
352; CHECK:       pred.load.if7:
353; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
354; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
355; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
356; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
357; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
358; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
359; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
360; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
361; CHECK:       pred.load.continue8:
362; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
363; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
364; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> splat (i32 1)
365; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
366; CHECK-NEXT:    [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
367; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
368; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
369; CHECK-NEXT:    [[TMP45:%.*]] = mul i32 [[TMP44]], [[TMP42]]
370; CHECK-NEXT:    [[TMP46:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
371; CHECK-NEXT:    [[TMP47:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP46]])
372; CHECK-NEXT:    [[TMP48]] = mul i32 [[TMP47]], [[TMP45]]
373; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
374; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
375; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
376; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
377; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
378; CHECK:       middle.block:
379; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
380; CHECK:       scalar.ph:
381; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
382; CHECK:       .lr.ph:
383; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]]
384; CHECK:       ._crit_edge:
385; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
386; CHECK-NEXT:    ret i32 [[PROD_0_LCSSA]]
387;
388entry:
389  br label %.lr.ph
390
391.lr.ph:                                           ; preds = %entry, %.lr.ph
392  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
393  %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
394  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
395  %l3 = load i32, ptr %l2, align 4
396  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
397  %l5 = load i32, ptr %l4, align 4
398  %l6 = trunc i64 %indvars.iv to i32
399  %l7 = mul i32 %prod.02, %l6
400  %l8 = mul i32 %l7, %l3
401  %l9 = mul i32 %l8, %l5
402  %indvars.iv.next = add i64 %indvars.iv, 1
403  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
404  %exitcond = icmp eq i32 %lftr.wideiv, 257
405  br i1 %exitcond, label %._crit_edge, label %.lr.ph
406
407._crit_edge:                                      ; preds = %.lr.ph
408  %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
409  ret i32 %prod.0.lcssa
410}
411
412define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
413; CHECK-LABEL: @reduction_mix(
414; CHECK-NEXT:  entry:
415; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
416; CHECK:       vector.ph:
417; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
418; CHECK:       vector.body:
419; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE8:%.*]] ]
420; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE8]] ]
421; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_LOAD_CONTINUE8]] ]
422; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_LOAD_CONTINUE8]] ]
423; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
424; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
425; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
426; CHECK:       pred.load.if:
427; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
428; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
429; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
430; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
431; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
432; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
433; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
434; CHECK:       pred.load.continue:
435; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
436; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
437; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
438; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
439; CHECK:       pred.load.if3:
440; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
441; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
442; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
443; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
444; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
445; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
446; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
447; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
448; CHECK:       pred.load.continue4:
449; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF3]] ]
450; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF3]] ]
451; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
452; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
453; CHECK:       pred.load.if5:
454; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
455; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
456; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
457; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
458; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
459; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
460; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
461; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
462; CHECK:       pred.load.continue6:
463; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ]
464; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
465; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
466; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8]]
467; CHECK:       pred.load.if7:
468; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
469; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
470; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
471; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
472; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
473; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
474; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
475; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
476; CHECK:       pred.load.continue8:
477; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP34]], [[PRED_LOAD_IF7]] ]
478; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE6]] ], [ [[TMP37]], [[PRED_LOAD_IF7]] ]
479; CHECK-NEXT:    [[TMP40:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP38]]
480; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[VEC_IND1]], <4 x i32> zeroinitializer
481; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP41]])
482; CHECK-NEXT:    [[TMP43:%.*]] = add i32 [[TMP42]], [[VEC_PHI]]
483; CHECK-NEXT:    [[TMP44:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
484; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP44]])
485; CHECK-NEXT:    [[TMP46]] = add i32 [[TMP45]], [[TMP43]]
486; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
487; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
488; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <4 x i32> [[VEC_IND1]], splat (i32 4)
489; CHECK-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
490; CHECK-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
491; CHECK:       middle.block:
492; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
493; CHECK:       scalar.ph:
494; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
495; CHECK:       .lr.ph:
496; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]]
497; CHECK:       ._crit_edge:
498; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP46]], [[MIDDLE_BLOCK]] ]
499; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
500;
501entry:
502  br label %.lr.ph
503
504.lr.ph:                                           ; preds = %entry, %.lr.ph
505  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
506  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
507  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
508  %l3 = load i32, ptr %l2, align 4
509  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
510  %l5 = load i32, ptr %l4, align 4
511  %l6 = mul nsw i32 %l5, %l3
512  %l7 = trunc i64 %indvars.iv to i32
513  %l8 = add i32 %sum.02, %l7
514  %l9 = add i32 %l8, %l6
515  %indvars.iv.next = add i64 %indvars.iv, 1
516  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
517  %exitcond = icmp eq i32 %lftr.wideiv, 257
518  br i1 %exitcond, label %._crit_edge, label %.lr.ph
519
520._crit_edge:                                      ; preds = %.lr.ph
521  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
522  ret i32 %sum.0.lcssa
523}
524
525define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
526; CHECK-LABEL: @reduction_mul(
527; CHECK-NEXT:  entry:
528; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
529; CHECK:       vector.ph:
530; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
531; CHECK:       vector.body:
532; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
533; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
534; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
535; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
536; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
537; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
538; CHECK:       pred.load.if:
539; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
540; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
541; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
542; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
543; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
544; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
545; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
546; CHECK:       pred.load.continue:
547; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
548; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
549; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
550; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
551; CHECK:       pred.load.if1:
552; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
553; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
554; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
555; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
556; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
557; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
558; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
559; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
560; CHECK:       pred.load.continue2:
561; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
562; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
563; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
564; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
565; CHECK:       pred.load.if3:
566; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
567; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
568; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
569; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
570; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
571; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
572; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
573; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
574; CHECK:       pred.load.continue4:
575; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
576; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
577; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
578; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
579; CHECK:       pred.load.if5:
580; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
581; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
582; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
583; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
584; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
585; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
586; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
587; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
588; CHECK:       pred.load.continue6:
589; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
590; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
591; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 1)
592; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP40]])
593; CHECK-NEXT:    [[TMP42:%.*]] = mul i32 [[TMP41]], [[VEC_PHI]]
594; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 1)
595; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP43]])
596; CHECK-NEXT:    [[TMP45]] = mul i32 [[TMP44]], [[TMP42]]
597; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
598; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
599; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
600; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
601; CHECK:       middle.block:
602; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
603; CHECK:       scalar.ph:
604; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
605; CHECK:       .lr.ph:
606; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]]
607; CHECK:       ._crit_edge:
608; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
609; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
610;
611entry:
612  br label %.lr.ph
613
614.lr.ph:                                           ; preds = %entry, %.lr.ph
615  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
616  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
617  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
618  %l3 = load i32, ptr %l2, align 4
619  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
620  %l5 = load i32, ptr %l4, align 4
621  %l6 = mul i32 %sum.02, %l3
622  %l7 = mul i32 %l6, %l5
623  %indvars.iv.next = add i64 %indvars.iv, 1
624  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
625  %exitcond = icmp eq i32 %lftr.wideiv, 257
626  br i1 %exitcond, label %._crit_edge, label %.lr.ph
627
628._crit_edge:                                      ; preds = %.lr.ph
629  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
630  ret i32 %sum.0.lcssa
631}
632
633define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
634; CHECK-LABEL: @reduction_and(
635; CHECK-NEXT:  entry:
636; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
637; CHECK:       vector.ph:
638; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
639; CHECK:       vector.body:
640; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
641; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
642; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
643; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
644; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
645; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
646; CHECK:       pred.load.if:
647; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
648; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
649; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
650; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
651; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
652; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
653; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
654; CHECK:       pred.load.continue:
655; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
656; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
657; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
658; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
659; CHECK:       pred.load.if1:
660; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
661; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
662; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
663; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
664; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
665; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
666; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
667; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
668; CHECK:       pred.load.continue2:
669; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
670; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
671; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
672; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
673; CHECK:       pred.load.if3:
674; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
675; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
676; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
677; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
678; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
679; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
680; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
681; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
682; CHECK:       pred.load.continue4:
683; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
684; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
685; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
686; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
687; CHECK:       pred.load.if5:
688; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
689; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
690; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
691; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
692; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
693; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
694; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
695; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
696; CHECK:       pred.load.continue6:
697; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
698; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
699; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP38]], <4 x i32> splat (i32 -1)
700; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP40]])
701; CHECK-NEXT:    [[TMP42:%.*]] = and i32 [[TMP41]], [[VEC_PHI]]
702; CHECK-NEXT:    [[TMP43:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP39]], <4 x i32> splat (i32 -1)
703; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP43]])
704; CHECK-NEXT:    [[TMP45]] = and i32 [[TMP44]], [[TMP42]]
705; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
706; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
707; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
708; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
709; CHECK:       middle.block:
710; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
711; CHECK:       scalar.ph:
712; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
713; CHECK:       for.body:
714; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
715; CHECK:       for.end:
716; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
717; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
718;
719entry:
720  br label %for.body
721
722for.body:                                         ; preds = %entry, %for.body
723  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
724  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
725  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
726  %l0 = load i32, ptr %arrayidx, align 4
727  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
728  %l1 = load i32, ptr %arrayidx2, align 4
729  %add = and i32 %result.08, %l0
730  %and = and i32 %add, %l1
731  %indvars.iv.next = add i64 %indvars.iv, 1
732  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
733  %exitcond = icmp eq i32 %lftr.wideiv, 257
734  br i1 %exitcond, label %for.end, label %for.body
735
736for.end:                                          ; preds = %for.body, %entry
737  %result.0.lcssa = phi i32 [ %and, %for.body ]
738  ret i32 %result.0.lcssa
739}
740
741define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
742; CHECK-LABEL: @reduction_or(
743; CHECK-NEXT:  entry:
744; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
745; CHECK:       vector.ph:
746; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
747; CHECK:       vector.body:
748; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
749; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
750; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
751; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
752; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
753; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
754; CHECK:       pred.load.if:
755; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
756; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
757; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
758; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
759; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
760; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
761; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
762; CHECK:       pred.load.continue:
763; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
764; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
765; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
766; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
767; CHECK:       pred.load.if1:
768; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
769; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
770; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
771; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
772; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
773; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
774; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
775; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
776; CHECK:       pred.load.continue2:
777; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
778; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
779; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
780; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
781; CHECK:       pred.load.if3:
782; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
783; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
784; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
785; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
786; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
787; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
788; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
789; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
790; CHECK:       pred.load.continue4:
791; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
792; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
793; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
794; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
795; CHECK:       pred.load.if5:
796; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
797; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
798; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
799; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
800; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
801; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
802; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
803; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
804; CHECK:       pred.load.continue6:
805; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
806; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
807; CHECK-NEXT:    [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
808; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
809; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP41]])
810; CHECK-NEXT:    [[TMP43]] = or i32 [[TMP42]], [[VEC_PHI]]
811; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
812; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
813; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
814; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
815; CHECK:       middle.block:
816; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
817; CHECK:       scalar.ph:
818; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
819; CHECK:       for.body:
820; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
821; CHECK:       for.end:
822; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
823; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
824;
825entry:
826  br label %for.body
827
828for.body:                                         ; preds = %entry, %for.body
829  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
830  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
831  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
832  %l0 = load i32, ptr %arrayidx, align 4
833  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
834  %l1 = load i32, ptr %arrayidx2, align 4
835  %add = add nsw i32 %l1, %l0
836  %or = or i32 %add, %result.08
837  %indvars.iv.next = add i64 %indvars.iv, 1
838  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
839  %exitcond = icmp eq i32 %lftr.wideiv, 257
840  br i1 %exitcond, label %for.end, label %for.body
841
842for.end:                                          ; preds = %for.body, %entry
843  %result.0.lcssa = phi i32 [ %or, %for.body ]
844  ret i32 %result.0.lcssa
845}
846
847define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
848; CHECK-LABEL: @reduction_xor(
849; CHECK-NEXT:  entry:
850; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
851; CHECK:       vector.ph:
852; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
853; CHECK:       vector.body:
854; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
855; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
856; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
857; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
858; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
859; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
860; CHECK:       pred.load.if:
861; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
862; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
863; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
864; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
865; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
866; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
867; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
868; CHECK:       pred.load.continue:
869; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
870; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
871; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
872; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
873; CHECK:       pred.load.if1:
874; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
875; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP11]]
876; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
877; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
878; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP11]]
879; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
880; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
881; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
882; CHECK:       pred.load.continue2:
883; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
884; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
885; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
886; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
887; CHECK:       pred.load.if3:
888; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
889; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP21]]
890; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
891; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
892; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]]
893; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
894; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
895; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
896; CHECK:       pred.load.continue4:
897; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
898; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
899; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
900; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
901; CHECK:       pred.load.if5:
902; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
903; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP31]]
904; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
905; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
906; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP31]]
907; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
908; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
909; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
910; CHECK:       pred.load.continue6:
911; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
912; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
913; CHECK-NEXT:    [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
914; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
915; CHECK-NEXT:    [[TMP42:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP41]])
916; CHECK-NEXT:    [[TMP43]] = xor i32 [[TMP42]], [[VEC_PHI]]
917; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
918; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
919; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
920; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
921; CHECK:       middle.block:
922; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
923; CHECK:       scalar.ph:
924; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
925; CHECK:       for.body:
926; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
927; CHECK:       for.end:
928; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
929; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
930;
931entry:
932  br label %for.body
933
934for.body:                                         ; preds = %entry, %for.body
935  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
936  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
937  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
938  %l0 = load i32, ptr %arrayidx, align 4
939  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
940  %l1 = load i32, ptr %arrayidx2, align 4
941  %add = add nsw i32 %l1, %l0
942  %xor = xor i32 %add, %result.08
943  %indvars.iv.next = add i64 %indvars.iv, 1
944  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
945  %exitcond = icmp eq i32 %lftr.wideiv, 257
946  br i1 %exitcond, label %for.end, label %for.body
947
948for.end:                                          ; preds = %for.body, %entry
949  %result.0.lcssa = phi i32 [ %xor, %for.body ]
950  ret i32 %result.0.lcssa
951}
952
953define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
954; CHECK-LABEL: @reduction_fadd(
955; CHECK-NEXT:  entry:
956; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
957; CHECK:       vector.ph:
958; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
959; CHECK:       vector.body:
960; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
961; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
962; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
963; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
964; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
965; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
966; CHECK:       pred.load.if:
967; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
968; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
969; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
970; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
971; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
972; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
973; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
974; CHECK:       pred.load.continue:
975; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
976; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
977; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
978; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
979; CHECK:       pred.load.if1:
980; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
981; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
982; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
983; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
984; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
985; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
986; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
987; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
988; CHECK:       pred.load.continue2:
989; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
990; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
991; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
992; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
993; CHECK:       pred.load.if3:
994; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
995; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
996; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
997; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
998; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
999; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
1000; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
1001; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1002; CHECK:       pred.load.continue4:
1003; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
1004; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
1005; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1006; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1007; CHECK:       pred.load.if5:
1008; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
1009; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
1010; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
1011; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
1012; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
1013; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
1014; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
1015; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1016; CHECK:       pred.load.continue6:
1017; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
1018; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
1019; CHECK-NEXT:    [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> zeroinitializer
1020; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP40]])
1021; CHECK-NEXT:    [[TMP42:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> zeroinitializer
1022; CHECK-NEXT:    [[TMP43]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP41]], <4 x float> [[TMP42]])
1023; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1024; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1025; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1026; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1027; CHECK:       middle.block:
1028; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1029; CHECK:       scalar.ph:
1030; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1031; CHECK:       for.body:
1032; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1033; CHECK:       for.end:
1034; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ]
1035; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
1036;
1037entry:
1038  br label %for.body
1039
1040for.body:                                         ; preds = %entry, %for.body
1041  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1042  %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
1043  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
1044  %l0 = load float, ptr %arrayidx, align 4
1045  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
1046  %l1 = load float, ptr %arrayidx2, align 4
1047  %add = fadd fast float %result.08, %l0
1048  %fadd = fadd fast float %add, %l1
1049  %indvars.iv.next = add i64 %indvars.iv, 1
1050  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1051  %exitcond = icmp eq i32 %lftr.wideiv, 257
1052  br i1 %exitcond, label %for.end, label %for.body
1053
1054for.end:                                          ; preds = %for.body, %entry
1055  %result.0.lcssa = phi float [ %fadd, %for.body ]
1056  ret float %result.0.lcssa
1057}
1058
1059define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
1060; CHECK-LABEL: @reduction_fmul(
1061; CHECK-NEXT:  entry:
1062; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1063; CHECK:       vector.ph:
1064; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1065; CHECK:       vector.body:
1066; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1067; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1068; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1069; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
1070; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
1071; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1072; CHECK:       pred.load.if:
1073; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
1074; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
1075; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
1076; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
1077; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
1078; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
1079; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1080; CHECK:       pred.load.continue:
1081; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
1082; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
1083; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
1084; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1085; CHECK:       pred.load.if1:
1086; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i64 [[INDEX]], 1
1087; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
1088; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
1089; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
1090; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP11]]
1091; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
1092; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
1093; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1094; CHECK:       pred.load.continue2:
1095; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
1096; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
1097; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
1098; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1099; CHECK:       pred.load.if3:
1100; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i64 [[INDEX]], 2
1101; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]]
1102; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
1103; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
1104; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP21]]
1105; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
1106; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
1107; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1108; CHECK:       pred.load.continue4:
1109; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
1110; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
1111; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1112; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1113; CHECK:       pred.load.if5:
1114; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i64 [[INDEX]], 3
1115; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]]
1116; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
1117; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
1118; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP31]]
1119; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
1120; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
1121; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1122; CHECK:       pred.load.continue6:
1123; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
1124; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
1125; CHECK-NEXT:    [[TMP40:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP38]], <4 x float> splat (float 1.000000e+00)
1126; CHECK-NEXT:    [[TMP41:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP40]])
1127; CHECK-NEXT:    [[TMP42:%.*]] = fmul fast float [[TMP41]], [[VEC_PHI]]
1128; CHECK-NEXT:    [[TMP43:%.*]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP39]], <4 x float> splat (float 1.000000e+00)
1129; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP43]])
1130; CHECK-NEXT:    [[TMP45]] = fmul fast float [[TMP44]], [[TMP42]]
1131; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1132; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1133; CHECK-NEXT:    [[TMP46:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1134; CHECK-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1135; CHECK:       middle.block:
1136; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1137; CHECK:       scalar.ph:
1138; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1139; CHECK:       for.body:
1140; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1141; CHECK:       for.end:
1142; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
1143; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
1144;
1145entry:
1146  br label %for.body
1147
1148for.body:                                         ; preds = %entry, %for.body
1149  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1150  %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
1151  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
1152  %l0 = load float, ptr %arrayidx, align 4
1153  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
1154  %l1 = load float, ptr %arrayidx2, align 4
1155  %add = fmul fast float %result.08, %l0
1156  %fmul = fmul fast float %add, %l1
1157  %indvars.iv.next = add i64 %indvars.iv, 1
1158  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1159  %exitcond = icmp eq i32 %lftr.wideiv, 257
1160  br i1 %exitcond, label %for.end, label %for.body
1161
1162for.end:                                          ; preds = %for.body, %entry
1163  %result.0.lcssa = phi float [ %fmul, %for.body ]
1164  ret float %result.0.lcssa
1165}
1166
1167define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
1168; CHECK-LABEL: @reduction_min(
1169; CHECK-NEXT:  entry:
1170; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1171; CHECK:       vector.ph:
1172; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1173; CHECK:       vector.body:
1174; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1175; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1176; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1177; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
1178; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
1179; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1180; CHECK:       pred.load.if:
1181; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
1182; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
1183; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
1184; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1185; CHECK:       pred.load.continue:
1186; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
1187; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
1188; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1189; CHECK:       pred.load.if1:
1190; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1
1191; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
1192; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
1193; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
1194; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1195; CHECK:       pred.load.continue2:
1196; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
1197; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
1198; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1199; CHECK:       pred.load.if3:
1200; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2
1201; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
1202; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
1203; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
1204; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1205; CHECK:       pred.load.continue4:
1206; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
1207; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1208; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1209; CHECK:       pred.load.if5:
1210; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3
1211; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
1212; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
1213; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
1214; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1215; CHECK:       pred.load.continue6:
1216; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1217; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> splat (i32 2147483647)
1218; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP24]])
1219; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
1220; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1221; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1222; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1223; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1224; CHECK:       middle.block:
1225; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1226; CHECK:       scalar.ph:
1227; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1228; CHECK:       for.body:
1229; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
1230; CHECK:       for.end:
1231; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
1232; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
1233;
1234entry:
1235  br label %for.body
1236
1237for.body:                                         ; preds = %entry, %for.body
1238  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1239  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
1240  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
1241  %l0 = load i32, ptr %arrayidx, align 4
1242  %c0 = icmp slt i32 %result.08, %l0
1243  %v0 = select i1 %c0, i32 %result.08, i32 %l0
1244  %indvars.iv.next = add i64 %indvars.iv, 1
1245  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1246  %exitcond = icmp eq i32 %lftr.wideiv, 257
1247  br i1 %exitcond, label %for.end, label %for.body
1248
1249for.end:                                          ; preds = %for.body, %entry
1250  %result.0.lcssa = phi i32 [ %v0, %for.body ]
1251  ret i32 %result.0.lcssa
1252}
1253
1254define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
1255; CHECK-LABEL: @reduction_max(
1256; CHECK-NEXT:  entry:
1257; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1258; CHECK:       vector.ph:
1259; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1260; CHECK:       vector.body:
1261; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1262; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1263; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1000, [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1264; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i64> [[VEC_IND]], splat (i64 257)
1265; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
1266; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1267; CHECK:       pred.load.if:
1268; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
1269; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
1270; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
1271; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1272; CHECK:       pred.load.continue:
1273; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
1274; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
1275; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1276; CHECK:       pred.load.if1:
1277; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i64 [[INDEX]], 1
1278; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP7]]
1279; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
1280; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
1281; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1282; CHECK:       pred.load.continue2:
1283; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
1284; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
1285; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1286; CHECK:       pred.load.if3:
1287; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i64 [[INDEX]], 2
1288; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
1289; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
1290; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
1291; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1292; CHECK:       pred.load.continue4:
1293; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
1294; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1295; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1296; CHECK:       pred.load.if5:
1297; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i64 [[INDEX]], 3
1298; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP19]]
1299; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
1300; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
1301; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1302; CHECK:       pred.load.continue6:
1303; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
1304; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
1305; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP24]])
1306; CHECK-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP25]], i32 [[VEC_PHI]])
1307; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1308; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
1309; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 260
1310; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1311; CHECK:       middle.block:
1312; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1313; CHECK:       scalar.ph:
1314; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1315; CHECK:       for.body:
1316; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1317; CHECK:       for.end:
1318; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
1319; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
1320;
1321entry:
1322  br label %for.body
1323
1324for.body:                                         ; preds = %entry, %for.body
1325  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
1326  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
1327  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
1328  %l0 = load i32, ptr %arrayidx, align 4
1329  %c0 = icmp ugt i32 %result.08, %l0
1330  %v0 = select i1 %c0, i32 %result.08, i32 %l0
1331  %indvars.iv.next = add i64 %indvars.iv, 1
1332  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1333  %exitcond = icmp eq i32 %lftr.wideiv, 257
1334  br i1 %exitcond, label %for.end, label %for.body
1335
1336for.end:                                          ; preds = %for.body, %entry
1337  %result.0.lcssa = phi i32 [ %v0, %for.body ]
1338  ret i32 %result.0.lcssa
1339}
1340
1341; Conditional reductions with multi-input phis.
1342define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
1343; CHECK-LABEL: @reduction_conditional(
1344; CHECK-NEXT:  entry:
1345; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1346; CHECK:       vector.ph:
1347; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i64 0
1348; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1349; CHECK:       vector.body:
1350; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1351; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
1352; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
1353; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
1354; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
1355; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1356; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
1357; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
1358; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
1359; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
1360; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
1361; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
1362; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
1363; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
1364; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
1365; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
1366; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
1367; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
1368; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1369; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
1370; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1371; CHECK:       middle.block:
1372; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]])
1373; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1374; CHECK:       scalar.ph:
1375; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1376; CHECK:       for.body:
1377; CHECK-NEXT:    br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
1378; CHECK:       if.then:
1379; CHECK-NEXT:    br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]]
1380; CHECK:       if.then8:
1381; CHECK-NEXT:    br label [[FOR_INC]]
1382; CHECK:       if.else:
1383; CHECK-NEXT:    br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]]
1384; CHECK:       if.then16:
1385; CHECK-NEXT:    br label [[FOR_INC]]
1386; CHECK:       for.inc:
1387; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]]
1388; CHECK:       for.end:
1389; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
1390; CHECK-NEXT:    ret float [[SUM_1_LCSSA]]
1391;
1392entry:
1393  br label %for.body
1394
1395for.body:
1396  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
1397  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
1398  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
1399  %l0 = load float, ptr %arrayidx, align 4
1400  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
1401  %l1 = load float, ptr %arrayidx2, align 4
1402  %cmp3 = fcmp ogt float %l0, %l1
1403  br i1 %cmp3, label %if.then, label %for.inc
1404
1405if.then:
1406  %cmp6 = fcmp ogt float %l1, 1.000000e+00
1407  br i1 %cmp6, label %if.then8, label %if.else
1408
1409if.then8:
1410  %add = fadd fast float %sum.033, %l0
1411  br label %for.inc
1412
1413if.else:
1414  %cmp14 = fcmp ogt float %l0, 2.000000e+00
1415  br i1 %cmp14, label %if.then16, label %for.inc
1416
1417if.then16:
1418  %add19 = fadd fast float %sum.033, %l1
1419  br label %for.inc
1420
1421for.inc:
1422  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
1423  %indvars.iv.next = add i64 %indvars.iv, 1
1424  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1425  %exitcond = icmp ne i32 %lftr.wideiv, 128
1426  br i1 %exitcond, label %for.body, label %for.end
1427
1428for.end:
1429  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
1430  ret float %sum.1.lcssa
1431}
1432
1433define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
1434; CHECK-LABEL: @reduction_add_trunc(
1435; CHECK-NEXT:  entry:
1436; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1437; CHECK:       vector.ph:
1438; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1439; CHECK:       vector.body:
1440; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1441; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1442; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1443; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
1444; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255)
1445; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
1446; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1447; CHECK:       pred.load.if:
1448; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
1449; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP3]]
1450; CHECK-NEXT:    [[TMP5:%.*]] = load i8, ptr [[TMP4]], align 4
1451; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x i8> poison, i8 [[TMP5]], i64 0
1452; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1453; CHECK:       pred.load.continue:
1454; CHECK-NEXT:    [[TMP7:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ]
1455; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
1456; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1457; CHECK:       pred.load.if1:
1458; CHECK-NEXT:    [[TMP9:%.*]] = or disjoint i32 [[INDEX]], 1
1459; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
1460; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP10]]
1461; CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4
1462; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i8> [[TMP7]], i8 [[TMP12]], i64 1
1463; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1464; CHECK:       pred.load.continue2:
1465; CHECK-NEXT:    [[TMP14:%.*]] = phi <4 x i8> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ]
1466; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
1467; CHECK-NEXT:    br i1 [[TMP15]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1468; CHECK:       pred.load.if3:
1469; CHECK-NEXT:    [[TMP16:%.*]] = or disjoint i32 [[INDEX]], 2
1470; CHECK-NEXT:    [[TMP17:%.*]] = sext i32 [[TMP16]] to i64
1471; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP17]]
1472; CHECK-NEXT:    [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 4
1473; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i8> [[TMP14]], i8 [[TMP19]], i64 2
1474; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1475; CHECK:       pred.load.continue4:
1476; CHECK-NEXT:    [[TMP21:%.*]] = phi <4 x i8> [ [[TMP14]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP20]], [[PRED_LOAD_IF3]] ]
1477; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1478; CHECK-NEXT:    br i1 [[TMP22]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1479; CHECK:       pred.load.if5:
1480; CHECK-NEXT:    [[TMP23:%.*]] = or disjoint i32 [[INDEX]], 3
1481; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
1482; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP24]]
1483; CHECK-NEXT:    [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 4
1484; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i8> [[TMP21]], i8 [[TMP26]], i64 3
1485; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1486; CHECK:       pred.load.continue6:
1487; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i8> [ [[TMP21]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], [[PRED_LOAD_IF5]] ]
1488; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
1489; CHECK-NEXT:    [[TMP30]] = add nuw nsw <4 x i32> [[TMP1]], [[TMP29]]
1490; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1491; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
1492; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1493; CHECK-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1494; CHECK:       middle.block:
1495; CHECK-NEXT:    [[TMP32:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP30]], <4 x i32> [[VEC_PHI]]
1496; CHECK-NEXT:    [[TMP33:%.*]] = trunc <4 x i32> [[TMP32]] to <4 x i8>
1497; CHECK-NEXT:    [[TMP34:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP33]])
1498; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
1499; CHECK:       scalar.ph:
1500; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
1501; CHECK:       .lr.ph:
1502; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]]
1503; CHECK:       ._crit_edge:
1504; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
1505; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
1506;
1507entry:
1508  br label %.lr.ph
1509
1510.lr.ph:                                           ; preds = %entry, %.lr.ph
1511  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
1512  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
1513  %sum.02 = and i32 %sum.02p, 255
1514  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
1515  %l3 = load i8, ptr %l2, align 4
1516  %l3e = zext i8 %l3 to i32
1517  %l9 = add i32 %sum.02, %l3e
1518  %indvars.iv.next = add i32 %indvars.iv, 1
1519  %exitcond = icmp eq i32 %indvars.iv.next, 257
1520  br i1 %exitcond, label %._crit_edge, label %.lr.ph
1521
1522._crit_edge:                                      ; preds = %.lr.ph
1523  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
1524  %ret = trunc i32 %sum.0.lcssa to i8
1525  ret i8 %ret
1526}
1527
1528
1529define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
1530; CHECK-LABEL: @reduction_and_trunc(
1531; CHECK-NEXT:  entry:
1532; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1533; CHECK:       vector.ph:
1534; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1535; CHECK:       vector.body:
1536; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1537; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1538; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1539; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
1540; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
1541; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1542; CHECK:       pred.load.if:
1543; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[INDEX]] to i64
1544; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP2]]
1545; CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4
1546; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i8> poison, i8 [[TMP4]], i64 0
1547; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1548; CHECK:       pred.load.continue:
1549; CHECK-NEXT:    [[TMP6:%.*]] = phi <4 x i8> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_LOAD_IF]] ]
1550; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
1551; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1552; CHECK:       pred.load.if1:
1553; CHECK-NEXT:    [[TMP8:%.*]] = or disjoint i32 [[INDEX]], 1
1554; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
1555; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]]
1556; CHECK-NEXT:    [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4
1557; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i8> [[TMP6]], i8 [[TMP11]], i64 1
1558; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1559; CHECK:       pred.load.continue2:
1560; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i8> [ [[TMP6]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ]
1561; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
1562; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1563; CHECK:       pred.load.if3:
1564; CHECK-NEXT:    [[TMP15:%.*]] = or disjoint i32 [[INDEX]], 2
1565; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
1566; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP16]]
1567; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[TMP17]], align 4
1568; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i8> [[TMP13]], i8 [[TMP18]], i64 2
1569; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1570; CHECK:       pred.load.continue4:
1571; CHECK-NEXT:    [[TMP20:%.*]] = phi <4 x i8> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ]
1572; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
1573; CHECK-NEXT:    br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1574; CHECK:       pred.load.if5:
1575; CHECK-NEXT:    [[TMP22:%.*]] = or disjoint i32 [[INDEX]], 3
1576; CHECK-NEXT:    [[TMP23:%.*]] = sext i32 [[TMP22]] to i64
1577; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP23]]
1578; CHECK-NEXT:    [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 4
1579; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i8> [[TMP20]], i8 [[TMP25]], i64 3
1580; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1581; CHECK:       pred.load.continue6:
1582; CHECK-NEXT:    [[TMP27:%.*]] = phi <4 x i8> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
1583; CHECK-NEXT:    [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
1584; CHECK-NEXT:    [[TMP29]] = and <4 x i32> [[VEC_PHI]], [[TMP28]]
1585; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1586; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
1587; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
1588; CHECK-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
1589; CHECK:       middle.block:
1590; CHECK-NEXT:    [[TMP31:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP29]], <4 x i32> [[VEC_PHI]]
1591; CHECK-NEXT:    [[TMP32:%.*]] = trunc <4 x i32> [[TMP31]] to <4 x i8>
1592; CHECK-NEXT:    [[TMP33:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP32]])
1593; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
1594; CHECK:       scalar.ph:
1595; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
1596; CHECK:       .lr.ph:
1597; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]]
1598; CHECK:       ._crit_edge:
1599; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
1600; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
1601;
1602entry:
1603  br label %.lr.ph
1604
1605.lr.ph:                                           ; preds = %entry, %.lr.ph
1606  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
1607  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
1608  %sum.02 = and i32 %sum.02p, 255
1609  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
1610  %l3 = load i8, ptr %l2, align 4
1611  %l3e = zext i8 %l3 to i32
1612  %l9 = and i32 %sum.02, %l3e
1613  %indvars.iv.next = add i32 %indvars.iv, 1
1614  %exitcond = icmp eq i32 %indvars.iv.next, 257
1615  br i1 %exitcond, label %._crit_edge, label %.lr.ph
1616
1617._crit_edge:                                      ; preds = %.lr.ph
1618  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
1619  %ret = trunc i32 %sum.0.lcssa to i8
1620  ret i8 %ret
1621}
1622