xref: /llvm-project/llvm/test/Transforms/LoopVectorize/reduction-predselect.ll (revision 82821254f532c1dbdfd5d985ef7130511efaaa83)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-width=4 -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -prefer-predicated-reduction-select -S | FileCheck %s
3
4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
5
6define i32 @reduction_sum_single(ptr noalias nocapture %A) {
7; CHECK-LABEL: @reduction_sum_single(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10; CHECK:       vector.ph:
11; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
12; CHECK:       vector.body:
13; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
14; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
15; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
16; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
17; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
18; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
19; CHECK:       pred.load.if:
20; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
21; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
22; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
23; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
24; CHECK:       pred.load.continue:
25; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
26; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
27; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
28; CHECK:       pred.load.if1:
29; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
30; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
31; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
32; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
33; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
34; CHECK:       pred.load.continue2:
35; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
36; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
37; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
38; CHECK:       pred.load.if3:
39; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i32 [[INDEX]], 2
40; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
41; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
42; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
43; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
44; CHECK:       pred.load.continue4:
45; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
46; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
47; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
48; CHECK:       pred.load.if5:
49; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i32 [[INDEX]], 3
50; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
51; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
52; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
53; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
54; CHECK:       pred.load.continue6:
55; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
56; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP23]], <4 x i32> zeroinitializer
57; CHECK-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
58; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
59; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
60; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
61; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
62; CHECK:       middle.block:
63; CHECK-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
64; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
65; CHECK:       scalar.ph:
66; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
67; CHECK:       .lr.ph:
68; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
69; CHECK:       ._crit_edge:
70; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
71; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
72;
73entry:
74  br label %.lr.ph
75
76.lr.ph:                                           ; preds = %entry, %.lr.ph
77  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
78  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
79  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
80  %l3 = load i32, ptr %l2, align 4
81  %l7 = add i32 %sum.02, %l3
82  %indvars.iv.next = add i32 %indvars.iv, 1
83  %exitcond = icmp eq i32 %indvars.iv.next, 257
84  br i1 %exitcond, label %._crit_edge, label %.lr.ph
85
86._crit_edge:                                      ; preds = %.lr.ph
87  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
88  ret i32 %sum.0.lcssa
89}
90
91define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
92; CHECK-LABEL: @reduction_sum(
93; CHECK-NEXT:  entry:
94; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
95; CHECK:       vector.ph:
96; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
97; CHECK:       vector.body:
98; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
99; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
100; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_LOAD_CONTINUE6]] ]
101; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
102; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
103; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
104; CHECK:       pred.load.if:
105; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
106; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
107; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
108; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
109; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
110; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
111; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
112; CHECK:       pred.load.continue:
113; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
114; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
115; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
116; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
117; CHECK:       pred.load.if1:
118; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
119; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
120; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
121; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
122; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
123; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
124; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
125; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
126; CHECK:       pred.load.continue2:
127; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
128; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
129; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
130; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
131; CHECK:       pred.load.if3:
132; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
133; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
134; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
135; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
136; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
137; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
138; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
139; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
140; CHECK:       pred.load.continue4:
141; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
142; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
143; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
144; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
145; CHECK:       pred.load.if5:
146; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
147; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
148; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
149; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
150; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
151; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
152; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
153; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
154; CHECK:       pred.load.continue6:
155; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
156; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
157; CHECK-NEXT:    [[TMP40:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND]]
158; CHECK-NEXT:    [[TMP41:%.*]] = add <4 x i32> [[TMP40]], [[TMP38]]
159; CHECK-NEXT:    [[TMP42:%.*]] = add <4 x i32> [[TMP41]], [[TMP39]]
160; CHECK-NEXT:    [[TMP43]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP42]], <4 x i32> [[VEC_PHI]]
161; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
162; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
163; CHECK-NEXT:    [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
164; CHECK-NEXT:    br i1 [[TMP44]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
165; CHECK:       middle.block:
166; CHECK-NEXT:    [[TMP45:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP43]])
167; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
168; CHECK:       scalar.ph:
169; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
170; CHECK:       .lr.ph:
171; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]]
172; CHECK:       ._crit_edge:
173; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP45]], [[MIDDLE_BLOCK]] ]
174; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
175;
176entry:
177  br label %.lr.ph
178
179.lr.ph:                                           ; preds = %entry, %.lr.ph
180  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
181  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
182  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
183  %l3 = load i32, ptr %l2, align 4
184  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
185  %l5 = load i32, ptr %l4, align 4
186  %l7 = add i32 %sum.02, %indvars.iv
187  %l8 = add i32 %l7, %l3
188  %l9 = add i32 %l8, %l5
189  %indvars.iv.next = add i32 %indvars.iv, 1
190  %exitcond = icmp eq i32 %indvars.iv.next, 257
191  br i1 %exitcond, label %._crit_edge, label %.lr.ph
192
193._crit_edge:                                      ; preds = %.lr.ph
194  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
195  ret i32 %sum.0.lcssa
196}
197
198define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
199; CHECK-LABEL: @reduction_prod(
200; CHECK-NEXT:  entry:
201; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
202; CHECK:       vector.ph:
203; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
204; CHECK:       vector.body:
205; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
206; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
207; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
208; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
209; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
210; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
211; CHECK:       pred.load.if:
212; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
213; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
214; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
215; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
216; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
217; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
218; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
219; CHECK:       pred.load.continue:
220; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
221; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
222; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
223; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
224; CHECK:       pred.load.if1:
225; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
226; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
227; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
228; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
229; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
230; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
231; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
232; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
233; CHECK:       pred.load.continue2:
234; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
235; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
236; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
237; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
238; CHECK:       pred.load.if3:
239; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
240; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
241; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
242; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
243; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
244; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
245; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
246; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
247; CHECK:       pred.load.continue4:
248; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
249; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
250; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
251; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
252; CHECK:       pred.load.if5:
253; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
254; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
255; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
256; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
257; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
258; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
259; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
260; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
261; CHECK:       pred.load.continue6:
262; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
263; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
264; CHECK-NEXT:    [[TMP40:%.*]] = mul <4 x i32> [[VEC_PHI]], [[TMP38]]
265; CHECK-NEXT:    [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP39]]
266; CHECK-NEXT:    [[TMP42]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP41]], <4 x i32> [[VEC_PHI]]
267; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
268; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
269; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
270; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
271; CHECK:       middle.block:
272; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP42]])
273; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
274; CHECK:       scalar.ph:
275; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
276; CHECK:       .lr.ph:
277; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
278; CHECK:       ._crit_edge:
279; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
280; CHECK-NEXT:    ret i32 [[PROD_0_LCSSA]]
281;
282entry:
283  br label %.lr.ph
284
285.lr.ph:                                           ; preds = %entry, %.lr.ph
286  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
287  %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
288  %l2 = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
289  %l3 = load i32, ptr %l2, align 4
290  %l4 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
291  %l5 = load i32, ptr %l4, align 4
292  %l8 = mul i32 %prod.02, %l3
293  %l9 = mul i32 %l8, %l5
294  %indvars.iv.next = add i32 %indvars.iv, 1
295  %exitcond = icmp eq i32 %indvars.iv.next, 257
296  br i1 %exitcond, label %._crit_edge, label %.lr.ph
297
298._crit_edge:                                      ; preds = %.lr.ph
299  %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
300  ret i32 %prod.0.lcssa
301}
302
303define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
304; CHECK-LABEL: @reduction_and(
305; CHECK-NEXT:  entry:
306; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
307; CHECK:       vector.ph:
308; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
309; CHECK:       vector.body:
310; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
311; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
312; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
313; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
314; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
315; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
316; CHECK:       pred.load.if:
317; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
318; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
319; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
320; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
321; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
322; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
323; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
324; CHECK:       pred.load.continue:
325; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
326; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
327; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
328; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
329; CHECK:       pred.load.if1:
330; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
331; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
332; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
333; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
334; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
335; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
336; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
337; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
338; CHECK:       pred.load.continue2:
339; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
340; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
341; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
342; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
343; CHECK:       pred.load.if3:
344; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
345; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
346; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
347; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
348; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
349; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
350; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
351; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
352; CHECK:       pred.load.continue4:
353; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
354; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
355; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
356; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
357; CHECK:       pred.load.if5:
358; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
359; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
360; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
361; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
362; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
363; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
364; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
365; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
366; CHECK:       pred.load.continue6:
367; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
368; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
369; CHECK-NEXT:    [[TMP40:%.*]] = and <4 x i32> [[TMP38]], [[TMP39]]
370; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> splat (i32 -1)
371; CHECK-NEXT:    [[TMP42]] = and <4 x i32> [[VEC_PHI]], [[TMP41]]
372; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
373; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
374; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
375; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
376; CHECK:       middle.block:
377; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP42]])
378; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
379; CHECK:       scalar.ph:
380; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
381; CHECK:       for.body:
382; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
383; CHECK:       for.end:
384; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
385; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
386;
387entry:
388  br label %for.body
389
390for.body:                                         ; preds = %entry, %for.body
391  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
392  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
393  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
394  %l0 = load i32, ptr %arrayidx, align 4
395  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
396  %l1 = load i32, ptr %arrayidx2, align 4
397  %add = and i32 %result.08, %l0
398  %and = and i32 %add, %l1
399  %indvars.iv.next = add i32 %indvars.iv, 1
400  %exitcond = icmp eq i32 %indvars.iv.next, 257
401  br i1 %exitcond, label %for.end, label %for.body
402
403for.end:                                          ; preds = %for.body, %entry
404  %result.0.lcssa = phi i32 [ %and, %for.body ]
405  ret i32 %result.0.lcssa
406}
407
408define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
409; CHECK-LABEL: @reduction_or(
410; CHECK-NEXT:  entry:
411; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
412; CHECK:       vector.ph:
413; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
414; CHECK:       vector.body:
415; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
416; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
417; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
418; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
419; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
420; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
421; CHECK:       pred.load.if:
422; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
423; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
424; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
425; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
426; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
427; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
428; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
429; CHECK:       pred.load.continue:
430; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
431; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
432; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
433; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
434; CHECK:       pred.load.if1:
435; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
436; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
437; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
438; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
439; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
440; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
441; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
442; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
443; CHECK:       pred.load.continue2:
444; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
445; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
446; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
447; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
448; CHECK:       pred.load.if3:
449; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
450; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
451; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
452; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
453; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
454; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
455; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
456; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
457; CHECK:       pred.load.continue4:
458; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
459; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
460; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
461; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
462; CHECK:       pred.load.if5:
463; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
464; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
465; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
466; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
467; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
468; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
469; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
470; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
471; CHECK:       pred.load.continue6:
472; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
473; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
474; CHECK-NEXT:    [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
475; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
476; CHECK-NEXT:    [[TMP42]] = or <4 x i32> [[VEC_PHI]], [[TMP41]]
477; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
478; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
479; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
480; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
481; CHECK:       middle.block:
482; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP42]])
483; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
484; CHECK:       scalar.ph:
485; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
486; CHECK:       for.body:
487; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
488; CHECK:       for.end:
489; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
490; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
491;
492entry:
493  br label %for.body
494
495for.body:                                         ; preds = %entry, %for.body
496  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
497  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
498  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
499  %l0 = load i32, ptr %arrayidx, align 4
500  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
501  %l1 = load i32, ptr %arrayidx2, align 4
502  %add = add nsw i32 %l1, %l0
503  %or = or i32 %add, %result.08
504  %indvars.iv.next = add i32 %indvars.iv, 1
505  %exitcond = icmp eq i32 %indvars.iv.next, 257
506  br i1 %exitcond, label %for.end, label %for.body
507
508for.end:                                          ; preds = %for.body, %entry
509  %result.0.lcssa = phi i32 [ %or, %for.body ]
510  ret i32 %result.0.lcssa
511}
512
513define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
514; CHECK-LABEL: @reduction_xor(
515; CHECK-NEXT:  entry:
516; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
517; CHECK:       vector.ph:
518; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
519; CHECK:       vector.body:
520; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
521; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
522; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
523; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
524; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
525; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
526; CHECK:       pred.load.if:
527; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
528; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
529; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
530; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[INDEX]]
531; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
532; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
533; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
534; CHECK:       pred.load.continue:
535; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
536; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
537; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
538; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
539; CHECK:       pred.load.if1:
540; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
541; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
542; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
543; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP13]], i64 1
544; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP11]]
545; CHECK-NEXT:    [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
546; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP16]], i64 1
547; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
548; CHECK:       pred.load.continue2:
549; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
550; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
551; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
552; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
553; CHECK:       pred.load.if3:
554; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
555; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP21]]
556; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
557; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP23]], i64 2
558; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP21]]
559; CHECK-NEXT:    [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
560; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP26]], i64 2
561; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
562; CHECK:       pred.load.continue4:
563; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
564; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
565; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
566; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
567; CHECK:       pred.load.if5:
568; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
569; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP31]]
570; CHECK-NEXT:    [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
571; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP33]], i64 3
572; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[TMP31]]
573; CHECK-NEXT:    [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4
574; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP36]], i64 3
575; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
576; CHECK:       pred.load.continue6:
577; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
578; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
579; CHECK-NEXT:    [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP38]]
580; CHECK-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP40]], <4 x i32> zeroinitializer
581; CHECK-NEXT:    [[TMP42]] = xor <4 x i32> [[VEC_PHI]], [[TMP41]]
582; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
583; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
584; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
585; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
586; CHECK:       middle.block:
587; CHECK-NEXT:    [[TMP44:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP42]])
588; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
589; CHECK:       scalar.ph:
590; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
591; CHECK:       for.body:
592; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
593; CHECK:       for.end:
594; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
595; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
596;
597entry:
598  br label %for.body
599
600for.body:                                         ; preds = %entry, %for.body
601  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
602  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
603  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
604  %l0 = load i32, ptr %arrayidx, align 4
605  %arrayidx2 = getelementptr inbounds i32, ptr %B, i32 %indvars.iv
606  %l1 = load i32, ptr %arrayidx2, align 4
607  %add = add nsw i32 %l1, %l0
608  %xor = xor i32 %add, %result.08
609  %indvars.iv.next = add i32 %indvars.iv, 1
610  %exitcond = icmp eq i32 %indvars.iv.next, 257
611  br i1 %exitcond, label %for.end, label %for.body
612
613for.end:                                          ; preds = %for.body, %entry
614  %result.0.lcssa = phi i32 [ %xor, %for.body ]
615  ret i32 %result.0.lcssa
616}
617
618define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
619; CHECK-LABEL: @reduction_fadd(
620; CHECK-NEXT:  entry:
621; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
622; CHECK:       vector.ph:
623; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
624; CHECK:       vector.body:
625; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
626; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
627; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
628; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
629; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
630; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
631; CHECK:       pred.load.if:
632; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
633; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
634; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
635; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
636; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
637; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
638; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
639; CHECK:       pred.load.continue:
640; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
641; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
642; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
643; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
644; CHECK:       pred.load.if1:
645; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
646; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
647; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
648; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
649; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
650; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
651; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
652; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
653; CHECK:       pred.load.continue2:
654; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
655; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
656; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
657; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
658; CHECK:       pred.load.if3:
659; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
660; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
661; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
662; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
663; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
664; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
665; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
666; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
667; CHECK:       pred.load.continue4:
668; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
669; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
670; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
671; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
672; CHECK:       pred.load.if5:
673; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
674; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
675; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
676; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
677; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
678; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
679; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
680; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
681; CHECK:       pred.load.continue6:
682; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
683; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
684; CHECK-NEXT:    [[TMP40:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP38]]
685; CHECK-NEXT:    [[TMP41:%.*]] = fadd fast <4 x float> [[TMP40]], [[TMP39]]
686; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
687; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
688; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
689; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
690; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
691; CHECK:       middle.block:
692; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP42]])
693; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
694; CHECK:       scalar.ph:
695; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
696; CHECK:       for.body:
697; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
698; CHECK:       for.end:
699; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
700; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
701;
702entry:
703  br label %for.body
704
705for.body:                                         ; preds = %entry, %for.body
706  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
707  %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
708  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
709  %l0 = load float, ptr %arrayidx, align 4
710  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
711  %l1 = load float, ptr %arrayidx2, align 4
712  %add = fadd fast float %result.08, %l0
713  %fadd = fadd fast float %add, %l1
714  %indvars.iv.next = add i32 %indvars.iv, 1
715  %exitcond = icmp eq i32 %indvars.iv.next, 257
716  br i1 %exitcond, label %for.end, label %for.body
717
718for.end:                                          ; preds = %for.body, %entry
719  %result.0.lcssa = phi float [ %fadd, %for.body ]
720  ret float %result.0.lcssa
721}
722
723define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
724; CHECK-LABEL: @reduction_fmul(
725; CHECK-NEXT:  entry:
726; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
727; CHECK:       vector.ph:
728; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
729; CHECK:       vector.body:
730; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
731; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
732; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_LOAD_CONTINUE6]] ]
733; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
734; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
735; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
736; CHECK:       pred.load.if:
737; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 [[INDEX]]
738; CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4
739; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP3]], i64 0
740; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 [[INDEX]]
741; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[TMP5]], align 4
742; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0
743; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
744; CHECK:       pred.load.continue:
745; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
746; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ]
747; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
748; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
749; CHECK:       pred.load.if1:
750; CHECK-NEXT:    [[TMP11:%.*]] = or disjoint i32 [[INDEX]], 1
751; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP11]]
752; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[TMP12]], align 4
753; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x float> [[TMP8]], float [[TMP13]], i64 1
754; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP11]]
755; CHECK-NEXT:    [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4
756; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP16]], i64 1
757; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
758; CHECK:       pred.load.continue2:
759; CHECK-NEXT:    [[TMP18:%.*]] = phi <4 x float> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ]
760; CHECK-NEXT:    [[TMP19:%.*]] = phi <4 x float> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], [[PRED_LOAD_IF1]] ]
761; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
762; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
763; CHECK:       pred.load.if3:
764; CHECK-NEXT:    [[TMP21:%.*]] = or disjoint i32 [[INDEX]], 2
765; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP21]]
766; CHECK-NEXT:    [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4
767; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP18]], float [[TMP23]], i64 2
768; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP21]]
769; CHECK-NEXT:    [[TMP26:%.*]] = load float, ptr [[TMP25]], align 4
770; CHECK-NEXT:    [[TMP27:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP26]], i64 2
771; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
772; CHECK:       pred.load.continue4:
773; CHECK-NEXT:    [[TMP28:%.*]] = phi <4 x float> [ [[TMP18]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP24]], [[PRED_LOAD_IF3]] ]
774; CHECK-NEXT:    [[TMP29:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP27]], [[PRED_LOAD_IF3]] ]
775; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
776; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
777; CHECK:       pred.load.if5:
778; CHECK-NEXT:    [[TMP31:%.*]] = or disjoint i32 [[INDEX]], 3
779; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i32 [[TMP31]]
780; CHECK-NEXT:    [[TMP33:%.*]] = load float, ptr [[TMP32]], align 4
781; CHECK-NEXT:    [[TMP34:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP33]], i64 3
782; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr inbounds float, ptr [[B]], i32 [[TMP31]]
783; CHECK-NEXT:    [[TMP36:%.*]] = load float, ptr [[TMP35]], align 4
784; CHECK-NEXT:    [[TMP37:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP36]], i64 3
785; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
786; CHECK:       pred.load.continue6:
787; CHECK-NEXT:    [[TMP38:%.*]] = phi <4 x float> [ [[TMP28]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ]
788; CHECK-NEXT:    [[TMP39:%.*]] = phi <4 x float> [ [[TMP29]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP37]], [[PRED_LOAD_IF5]] ]
789; CHECK-NEXT:    [[TMP40:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[TMP38]]
790; CHECK-NEXT:    [[TMP41:%.*]] = fmul fast <4 x float> [[TMP40]], [[TMP39]]
791; CHECK-NEXT:    [[TMP42]] = select fast <4 x i1> [[TMP0]], <4 x float> [[TMP41]], <4 x float> [[VEC_PHI]]
792; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
793; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
794; CHECK-NEXT:    [[TMP43:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
795; CHECK-NEXT:    br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
796; CHECK:       middle.block:
797; CHECK-NEXT:    [[TMP44:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP42]])
798; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
799; CHECK:       scalar.ph:
800; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
801; CHECK:       for.body:
802; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
803; CHECK:       for.end:
804; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP44]], [[MIDDLE_BLOCK]] ]
805; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
806;
807entry:
808  br label %for.body
809
810for.body:                                         ; preds = %entry, %for.body
811  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
812  %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
813  %arrayidx = getelementptr inbounds float, ptr %A, i32 %indvars.iv
814  %l0 = load float, ptr %arrayidx, align 4
815  %arrayidx2 = getelementptr inbounds float, ptr %B, i32 %indvars.iv
816  %l1 = load float, ptr %arrayidx2, align 4
817  %add = fmul fast float %result.08, %l0
818  %fmul = fmul fast float %add, %l1
819  %indvars.iv.next = add i32 %indvars.iv, 1
820  %exitcond = icmp eq i32 %indvars.iv.next, 257
821  br i1 %exitcond, label %for.end, label %for.body
822
823for.end:                                          ; preds = %for.body, %entry
824  %result.0.lcssa = phi float [ %fmul, %for.body ]
825  ret float %result.0.lcssa
826}
827
828define i32 @reduction_min(ptr nocapture %A, ptr nocapture %B) {
829; CHECK-LABEL: @reduction_min(
830; CHECK-NEXT:  entry:
831; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
832; CHECK:       vector.ph:
833; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
834; CHECK:       vector.body:
835; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
836; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
837; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
838; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
839; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
840; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
841; CHECK:       pred.load.if:
842; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
843; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
844; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
845; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
846; CHECK:       pred.load.continue:
847; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
848; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
849; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
850; CHECK:       pred.load.if1:
851; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
852; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
853; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
854; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
855; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
856; CHECK:       pred.load.continue2:
857; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
858; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
859; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
860; CHECK:       pred.load.if3:
861; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i32 [[INDEX]], 2
862; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
863; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
864; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
865; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
866; CHECK:       pred.load.continue4:
867; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
868; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
869; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
870; CHECK:       pred.load.if5:
871; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i32 [[INDEX]], 3
872; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
873; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
874; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
875; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
876; CHECK:       pred.load.continue6:
877; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
878; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
879; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
880; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
881; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
882; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
883; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
884; CHECK:       middle.block:
885; CHECK-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP25]])
886; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
887; CHECK:       scalar.ph:
888; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
889; CHECK:       for.body:
890; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
891; CHECK:       for.end:
892; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
893; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
894;
895entry:
896  br label %for.body
897
898for.body:                                         ; preds = %entry, %for.body
899  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
900  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
901  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
902  %l0 = load i32, ptr %arrayidx, align 4
903  %c0 = icmp slt i32 %result.08, %l0
904  %v0 = select i1 %c0, i32 %result.08, i32 %l0
905  %indvars.iv.next = add i32 %indvars.iv, 1
906  %exitcond = icmp eq i32 %indvars.iv.next, 257
907  br i1 %exitcond, label %for.end, label %for.body
908
909for.end:                                          ; preds = %for.body, %entry
910  %result.0.lcssa = phi i32 [ %v0, %for.body ]
911  ret i32 %result.0.lcssa
912}
913
914define i32 @reduction_max(ptr nocapture %A, ptr nocapture %B) {
915; CHECK-LABEL: @reduction_max(
916; CHECK-NEXT:  entry:
917; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
918; CHECK:       vector.ph:
919; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
920; CHECK:       vector.body:
921; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
922; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE6]] ]
923; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 1000), [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[PRED_LOAD_CONTINUE6]] ]
924; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult <4 x i32> [[VEC_IND]], splat (i32 257)
925; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i64 0
926; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
927; CHECK:       pred.load.if:
928; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]]
929; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
930; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
931; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
932; CHECK:       pred.load.continue:
933; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_LOAD_IF]] ]
934; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i64 1
935; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
936; CHECK:       pred.load.if1:
937; CHECK-NEXT:    [[TMP7:%.*]] = or disjoint i32 [[INDEX]], 1
938; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP7]]
939; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
940; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP9]], i64 1
941; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
942; CHECK:       pred.load.continue2:
943; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP10]], [[PRED_LOAD_IF1]] ]
944; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP0]], i64 2
945; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
946; CHECK:       pred.load.if3:
947; CHECK-NEXT:    [[TMP13:%.*]] = or disjoint i32 [[INDEX]], 2
948; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP13]]
949; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4
950; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP15]], i64 2
951; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
952; CHECK:       pred.load.continue4:
953; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP16]], [[PRED_LOAD_IF3]] ]
954; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP0]], i64 3
955; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
956; CHECK:       pred.load.if5:
957; CHECK-NEXT:    [[TMP19:%.*]] = or disjoint i32 [[INDEX]], 3
958; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP19]]
959; CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
960; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP21]], i64 3
961; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
962; CHECK:       pred.load.continue6:
963; CHECK-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP22]], [[PRED_LOAD_IF5]] ]
964; CHECK-NEXT:    [[TMP24:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP23]])
965; CHECK-NEXT:    [[TMP25]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
966; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
967; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
968; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], 260
969; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
970; CHECK:       middle.block:
971; CHECK-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP25]])
972; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
973; CHECK:       scalar.ph:
974; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
975; CHECK:       for.body:
976; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
977; CHECK:       for.end:
978; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
979; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
980;
981entry:
982  br label %for.body
983
984for.body:                                         ; preds = %entry, %for.body
985  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
986  %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
987  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %indvars.iv
988  %l0 = load i32, ptr %arrayidx, align 4
989  %c0 = icmp ugt i32 %result.08, %l0
990  %v0 = select i1 %c0, i32 %result.08, i32 %l0
991  %indvars.iv.next = add i32 %indvars.iv, 1
992  %exitcond = icmp eq i32 %indvars.iv.next, 257
993  br i1 %exitcond, label %for.end, label %for.body
994
995for.end:                                          ; preds = %for.body, %entry
996  %result.0.lcssa = phi i32 [ %v0, %for.body ]
997  ret i32 %result.0.lcssa
998}
999