xref: /llvm-project/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll (revision 462cb3cd6cecd0511ecaf0e3ebcaba455ece587d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5
6define i32 @reduction_sum_single(ptr noalias nocapture %A) {
7; CHECK-LABEL: @reduction_sum_single(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
10; CHECK:       vector.ph:
11; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
12; CHECK:       vector.body:
13; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
14; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
15; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
16; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
17; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
18; CHECK-NEXT:    [[TMP2]] = add i32 [[TMP1]], [[VEC_PHI]]
19; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
20; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
21; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
22; CHECK:       middle.block:
23; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
24; CHECK:       scalar.ph:
25; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
26; CHECK:       .lr.ph:
27; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP3:![0-9]+]]
28; CHECK:       ._crit_edge:
29; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
30; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
31;
32entry:
33  br label %.lr.ph
34
35.lr.ph:                                           ; preds = %entry, %.lr.ph
36  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
37  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
38  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
39  %l3 = load i32, ptr %l2, align 4
40  %l7 = add i32 %sum.02, %l3
41  %indvars.iv.next = add i64 %indvars.iv, 1
42  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
43  %exitcond = icmp eq i32 %lftr.wideiv, 256
44  br i1 %exitcond, label %._crit_edge, label %.lr.ph
45
46._crit_edge:                                      ; preds = %.lr.ph
47  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
48  ret i32 %sum.0.lcssa
49}
50
51define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) {
52; CHECK-LABEL: @reduction_sum(
53; CHECK-NEXT:  entry:
54; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
55; CHECK:       vector.ph:
56; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
57; CHECK:       vector.body:
58; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
59; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
60; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
61; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
62; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
63; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
64; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
65; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
66; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], [[VEC_PHI]]
67; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
68; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP3]]
69; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
70; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[TMP5]]
71; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
72; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
73; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
74; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
75; CHECK:       middle.block:
76; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
77; CHECK:       scalar.ph:
78; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
79; CHECK:       .lr.ph:
80; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP5:![0-9]+]]
81; CHECK:       ._crit_edge:
82; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
83; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
84;
85entry:
86  br label %.lr.ph
87
88.lr.ph:                                           ; preds = %entry, %.lr.ph
89  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
90  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
91  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
92  %l3 = load i32, ptr %l2, align 4
93  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
94  %l5 = load i32, ptr %l4, align 4
95  %l6 = trunc i64 %indvars.iv to i32
96  %l7 = add i32 %sum.02, %l6
97  %l8 = add i32 %l7, %l3
98  %l9 = add i32 %l8, %l5
99  %indvars.iv.next = add i64 %indvars.iv, 1
100  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
101  %exitcond = icmp eq i32 %lftr.wideiv, 256
102  br i1 %exitcond, label %._crit_edge, label %.lr.ph
103
104._crit_edge:                                      ; preds = %.lr.ph
105  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
106  ret i32 %sum.0.lcssa
107}
108
109define i32 @reduction_sum_const(ptr noalias nocapture %A) {
110; CHECK-LABEL: @reduction_sum_const(
111; CHECK-NEXT:  entry:
112; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
113; CHECK:       vector.ph:
114; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
115; CHECK:       vector.body:
116; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
117; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
118; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
119; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
120; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
121; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], [[VEC_PHI]]
122; CHECK-NEXT:    [[TMP3]] = add i32 [[TMP2]], 12
123; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
124; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
125; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
126; CHECK:       middle.block:
127; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
128; CHECK:       scalar.ph:
129; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
130; CHECK:       .lr.ph:
131; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP7:![0-9]+]]
132; CHECK:       ._crit_edge:
133; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
134; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
135;
136entry:
137  br label %.lr.ph
138
139.lr.ph:                                           ; preds = %entry, %.lr.ph
140  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
141  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
142  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
143  %l3 = load i32, ptr %l2, align 4
144  %l7 = add i32 %sum.02, %l3
145  %l9 = add i32 %l7, 3
146  %indvars.iv.next = add i64 %indvars.iv, 1
147  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
148  %exitcond = icmp eq i32 %lftr.wideiv, 256
149  br i1 %exitcond, label %._crit_edge, label %.lr.ph
150
151._crit_edge:                                      ; preds = %.lr.ph
152  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
153  ret i32 %sum.0.lcssa
154}
155
156define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) {
157; CHECK-LABEL: @reduction_prod(
158; CHECK-NEXT:  entry:
159; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
160; CHECK:       vector.ph:
161; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
162; CHECK:       vector.body:
163; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
164; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
165; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
166; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
167; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
168; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
169; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
170; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]])
171; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[TMP2]], [[VEC_PHI]]
172; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
173; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[TMP4]], [[TMP3]]
174; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
175; CHECK-NEXT:    [[TMP7]] = mul i32 [[TMP6]], [[TMP5]]
176; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
177; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
178; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
179; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
180; CHECK:       middle.block:
181; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
182; CHECK:       scalar.ph:
183; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
184; CHECK:       .lr.ph:
185; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP9:![0-9]+]]
186; CHECK:       ._crit_edge:
187; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
188; CHECK-NEXT:    ret i32 [[PROD_0_LCSSA]]
189;
190entry:
191  br label %.lr.ph
192
193.lr.ph:                                           ; preds = %entry, %.lr.ph
194  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
195  %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
196  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
197  %l3 = load i32, ptr %l2, align 4
198  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
199  %l5 = load i32, ptr %l4, align 4
200  %l6 = trunc i64 %indvars.iv to i32
201  %l7 = mul i32 %prod.02, %l6
202  %l8 = mul i32 %l7, %l3
203  %l9 = mul i32 %l8, %l5
204  %indvars.iv.next = add i64 %indvars.iv, 1
205  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
206  %exitcond = icmp eq i32 %lftr.wideiv, 256
207  br i1 %exitcond, label %._crit_edge, label %.lr.ph
208
209._crit_edge:                                      ; preds = %.lr.ph
210  %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
211  ret i32 %prod.0.lcssa
212}
213
214define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) {
215; CHECK-LABEL: @reduction_mix(
216; CHECK-NEXT:  entry:
217; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
218; CHECK:       vector.ph:
219; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
220; CHECK:       vector.body:
221; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
222; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
223; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
224; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
225; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
226; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
227; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
228; CHECK-NEXT:    [[TMP2:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
229; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
230; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[VEC_PHI]]
231; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
232; CHECK-NEXT:    [[TMP6]] = add i32 [[TMP5]], [[TMP4]]
233; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
234; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
235; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
236; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
237; CHECK:       middle.block:
238; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
239; CHECK:       scalar.ph:
240; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
241; CHECK:       .lr.ph:
242; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP11:![0-9]+]]
243; CHECK:       ._crit_edge:
244; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
245; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
246;
247entry:
248  br label %.lr.ph
249
250.lr.ph:                                           ; preds = %entry, %.lr.ph
251  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
252  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
253  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
254  %l3 = load i32, ptr %l2, align 4
255  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
256  %l5 = load i32, ptr %l4, align 4
257  %l6 = mul nsw i32 %l5, %l3
258  %l7 = trunc i64 %indvars.iv to i32
259  %l8 = add i32 %sum.02, %l7
260  %l9 = add i32 %l8, %l6
261  %indvars.iv.next = add i64 %indvars.iv, 1
262  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
263  %exitcond = icmp eq i32 %lftr.wideiv, 256
264  br i1 %exitcond, label %._crit_edge, label %.lr.ph
265
266._crit_edge:                                      ; preds = %.lr.ph
267  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
268  ret i32 %sum.0.lcssa
269}
270
271define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) {
272; CHECK-LABEL: @reduction_mul(
273; CHECK-NEXT:  entry:
274; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
275; CHECK:       vector.ph:
276; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
277; CHECK:       vector.body:
278; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
279; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 19, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
280; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
281; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
282; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
283; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
284; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
285; CHECK-NEXT:    [[TMP3:%.*]] = mul i32 [[TMP2]], [[VEC_PHI]]
286; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD1]])
287; CHECK-NEXT:    [[TMP5]] = mul i32 [[TMP4]], [[TMP3]]
288; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
289; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
290; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
291; CHECK:       middle.block:
292; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
293; CHECK:       scalar.ph:
294; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
295; CHECK:       .lr.ph:
296; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP13:![0-9]+]]
297; CHECK:       ._crit_edge:
298; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
299; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
300;
301entry:
302  br label %.lr.ph
303
304.lr.ph:                                           ; preds = %entry, %.lr.ph
305  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
306  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
307  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
308  %l3 = load i32, ptr %l2, align 4
309  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
310  %l5 = load i32, ptr %l4, align 4
311  %l6 = mul i32 %sum.02, %l3
312  %l7 = mul i32 %l6, %l5
313  %indvars.iv.next = add i64 %indvars.iv, 1
314  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
315  %exitcond = icmp eq i32 %lftr.wideiv, 256
316  br i1 %exitcond, label %._crit_edge, label %.lr.ph
317
318._crit_edge:                                      ; preds = %.lr.ph
319  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
320  ret i32 %sum.0.lcssa
321}
322
323define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocapture %out) {
324; CHECK-LABEL: @start_at_non_zero(
325; CHECK-NEXT:  entry:
326; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
327; CHECK:       vector.ph:
328; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
329; CHECK:       vector.body:
330; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
331; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 120, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
332; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[INDEX]]
333; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
334; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COEFF:%.*]], i64 [[INDEX]]
335; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
336; CHECK-NEXT:    [[TMP2:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
337; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
338; CHECK-NEXT:    [[TMP4]] = add i32 [[TMP3]], [[VEC_PHI]]
339; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
340; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
341; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
342; CHECK:       middle.block:
343; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
344; CHECK:       scalar.ph:
345; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
346; CHECK:       for.body:
347; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
348; CHECK:       for.end:
349; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
350; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
351;
352entry:
353  br label %for.body
354
355for.body:                                         ; preds = %entry, %for.body
356  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
357  %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
358  %arrayidx = getelementptr inbounds i32, ptr %in, i64 %indvars.iv
359  %l0 = load i32, ptr %arrayidx, align 4
360  %arrayidx2 = getelementptr inbounds i32, ptr %coeff, i64 %indvars.iv
361  %l1 = load i32, ptr %arrayidx2, align 4
362  %mul = mul nsw i32 %l1, %l0
363  %add = add nsw i32 %mul, %sum.09
364  %indvars.iv.next = add i64 %indvars.iv, 1
365  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
366  %exitcond = icmp eq i32 %lftr.wideiv, 256
367  br i1 %exitcond, label %for.end, label %for.body
368
369for.end:                                          ; preds = %for.body, %entry
370  %sum.0.lcssa = phi i32 [ %add, %for.body ]
371  ret i32 %sum.0.lcssa
372}
373
374define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) {
375; CHECK-LABEL: @reduction_and(
376; CHECK-NEXT:  entry:
377; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
378; CHECK:       vector.ph:
379; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
380; CHECK:       vector.body:
381; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
382; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ -1, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
383; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
384; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
385; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
386; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
387; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]])
388; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], [[VEC_PHI]]
389; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD1]])
390; CHECK-NEXT:    [[TMP5]] = and i32 [[TMP4]], [[TMP3]]
391; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
392; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
393; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
394; CHECK:       middle.block:
395; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
396; CHECK:       scalar.ph:
397; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
398; CHECK:       for.body:
399; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
400; CHECK:       for.end:
401; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
402; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
403;
404entry:
405  br label %for.body
406
407for.body:                                         ; preds = %entry, %for.body
408  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
409  %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
410  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
411  %l0 = load i32, ptr %arrayidx, align 4
412  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
413  %l1 = load i32, ptr %arrayidx2, align 4
414  %add = and i32 %result.08, %l0
415  %and = and i32 %add, %l1
416  %indvars.iv.next = add i64 %indvars.iv, 1
417  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
418  %exitcond = icmp eq i32 %lftr.wideiv, 256
419  br i1 %exitcond, label %for.end, label %for.body
420
421for.end:                                          ; preds = %for.body, %entry
422  %result.0.lcssa = phi i32 [ %and, %for.body ]
423  ret i32 %result.0.lcssa
424}
425
426define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) {
427; CHECK-LABEL: @reduction_or(
428; CHECK-NEXT:  entry:
429; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
430; CHECK:       vector.ph:
431; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
432; CHECK:       vector.body:
433; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
434; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
435; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
436; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
437; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
438; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
439; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
440; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP2]])
441; CHECK-NEXT:    [[TMP4]] = or i32 [[TMP3]], [[VEC_PHI]]
442; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
443; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
444; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
445; CHECK:       middle.block:
446; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
447; CHECK:       scalar.ph:
448; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
449; CHECK:       for.body:
450; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
451; CHECK:       for.end:
452; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
453; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
454;
455entry:
456  br label %for.body
457
458for.body:                                         ; preds = %entry, %for.body
459  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
460  %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
461  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
462  %l0 = load i32, ptr %arrayidx, align 4
463  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
464  %l1 = load i32, ptr %arrayidx2, align 4
465  %add = add nsw i32 %l1, %l0
466  %or = or i32 %add, %result.08
467  %indvars.iv.next = add i64 %indvars.iv, 1
468  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
469  %exitcond = icmp eq i32 %lftr.wideiv, 256
470  br i1 %exitcond, label %for.end, label %for.body
471
472for.end:                                          ; preds = %for.body, %entry
473  %result.0.lcssa = phi i32 [ %or, %for.body ]
474  ret i32 %result.0.lcssa
475}
476
477define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) {
478; CHECK-LABEL: @reduction_xor(
479; CHECK-NEXT:  entry:
480; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
481; CHECK:       vector.ph:
482; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
483; CHECK:       vector.body:
484; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
485; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
486; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
487; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
488; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
489; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
490; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
491; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[TMP2]])
492; CHECK-NEXT:    [[TMP4]] = xor i32 [[TMP3]], [[VEC_PHI]]
493; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
494; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
495; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
496; CHECK:       middle.block:
497; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
498; CHECK:       scalar.ph:
499; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
500; CHECK:       for.body:
501; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
502; CHECK:       for.end:
503; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
504; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
505;
506entry:
507  br label %for.body
508
509for.body:                                         ; preds = %entry, %for.body
510  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
511  %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
512  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
513  %l0 = load i32, ptr %arrayidx, align 4
514  %arrayidx2 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
515  %l1 = load i32, ptr %arrayidx2, align 4
516  %add = add nsw i32 %l1, %l0
517  %xor = xor i32 %add, %result.08
518  %indvars.iv.next = add i64 %indvars.iv, 1
519  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
520  %exitcond = icmp eq i32 %lftr.wideiv, 256
521  br i1 %exitcond, label %for.end, label %for.body
522
523for.end:                                          ; preds = %for.body, %entry
524  %result.0.lcssa = phi i32 [ %xor, %for.body ]
525  ret i32 %result.0.lcssa
526}
527
528define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) {
529; CHECK-LABEL: @reduction_fadd(
530; CHECK-NEXT:  entry:
531; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
532; CHECK:       vector.ph:
533; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
534; CHECK:       vector.body:
535; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
536; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
537; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
538; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
539; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
540; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
541; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD]])
542; CHECK-NEXT:    [[TMP3]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[TMP2]], <4 x float> [[WIDE_LOAD1]])
543; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
544; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
545; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
546; CHECK:       middle.block:
547; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
548; CHECK:       scalar.ph:
549; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
550; CHECK:       for.body:
551; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
552; CHECK:       for.end:
553; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
554; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
555;
556entry:
557  br label %for.body
558
559for.body:                                         ; preds = %entry, %for.body
560  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
561  %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
562  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
563  %l0 = load float, ptr %arrayidx, align 4
564  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
565  %l1 = load float, ptr %arrayidx2, align 4
566  %add = fadd fast float %result.08, %l0
567  %fadd = fadd fast float %add, %l1
568  %indvars.iv.next = add i64 %indvars.iv, 1
569  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
570  %exitcond = icmp eq i32 %lftr.wideiv, 256
571  br i1 %exitcond, label %for.end, label %for.body
572
573for.end:                                          ; preds = %for.body, %entry
574  %result.0.lcssa = phi float [ %fadd, %for.body ]
575  ret float %result.0.lcssa
576}
577
578define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) {
579; CHECK-LABEL: @reduction_fmul(
580; CHECK-NEXT:  entry:
581; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
582; CHECK:       vector.ph:
583; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
584; CHECK:       vector.body:
585; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
586; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
587; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
588; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
589; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
590; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
591; CHECK-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]])
592; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast float [[TMP2]], [[VEC_PHI]]
593; CHECK-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD1]])
594; CHECK-NEXT:    [[TMP5]] = fmul fast float [[TMP4]], [[TMP3]]
595; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
596; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
597; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
598; CHECK:       middle.block:
599; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
600; CHECK:       scalar.ph:
601; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
602; CHECK:       for.body:
603; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
604; CHECK:       for.end:
605; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
606; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
607;
608entry:
609  br label %for.body
610
611for.body:                                         ; preds = %entry, %for.body
612  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
613  %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
614  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
615  %l0 = load float, ptr %arrayidx, align 4
616  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
617  %l1 = load float, ptr %arrayidx2, align 4
618  %add = fmul fast float %result.08, %l0
619  %fmul = fmul fast float %add, %l1
620  %indvars.iv.next = add i64 %indvars.iv, 1
621  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
622  %exitcond = icmp eq i32 %lftr.wideiv, 256
623  br i1 %exitcond, label %for.end, label %for.body
624
625for.end:                                          ; preds = %for.body, %entry
626  %result.0.lcssa = phi float [ %fmul, %for.body ]
627  ret float %result.0.lcssa
628}
629
630; Sub we can create a reduction, but not inloop
631define i32 @reduction_sub_lhs(ptr noalias nocapture %A) {
632; CHECK-LABEL: @reduction_sub_lhs(
633; CHECK-NEXT:  entry:
634; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
635; CHECK:       vector.ph:
636; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
637; CHECK:       vector.body:
638; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
639; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
640; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
641; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
642; CHECK-NEXT:    [[TMP1]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
643; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
644; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
645; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
646; CHECK:       middle.block:
647; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
648; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
649; CHECK:       scalar.ph:
650; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
651; CHECK:       for.body:
652; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
653; CHECK:       for.end:
654; CHECK-NEXT:    [[X_0_LCSSA:%.*]] = phi i32 [ poison, [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
655; CHECK-NEXT:    ret i32 [[X_0_LCSSA]]
656;
657entry:
658  br label %for.body
659
660for.body:                                         ; preds = %entry, %for.body
661  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
662  %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
663  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
664  %l0 = load i32, ptr %arrayidx, align 4
665  %sub = sub nsw i32 %x.05, %l0
666  %indvars.iv.next = add i64 %indvars.iv, 1
667  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
668  %exitcond = icmp eq i32 %lftr.wideiv, 256
669  br i1 %exitcond, label %for.end, label %for.body
670
671for.end:                                          ; preds = %for.body, %entry
672  %x.0.lcssa = phi i32 [ %sub, %for.body ]
673  ret i32 %x.0.lcssa
674}
675
676; Conditional reductions with multi-input phis.
677define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) {
678; CHECK-LABEL: @reduction_conditional(
679; CHECK-NEXT:  entry:
680; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
681; CHECK:       vector.ph:
682; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i64 0
683; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
684; CHECK:       vector.body:
685; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
686; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ]
687; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
688; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
689; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
690; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
691; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
692; CHECK-NEXT:    [[TMP4:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00)
693; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i1> [[TMP3]], [[TMP4]]
694; CHECK-NEXT:    [[TMP6:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
695; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP5]], [[TMP6]]
696; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true)
697; CHECK-NEXT:    [[TMP9:%.*]] = and <4 x i1> [[TMP5]], [[TMP8]]
698; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
699; CHECK-NEXT:    [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]]
700; CHECK-NEXT:    [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]]
701; CHECK-NEXT:    [[TMP11:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]]
702; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP11]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
703; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
704; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
705; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
706; CHECK:       middle.block:
707; CHECK-NEXT:    [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]])
708; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
709; CHECK:       scalar.ph:
710; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
711; CHECK:       for.body:
712; CHECK-NEXT:    br i1 poison, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]]
713; CHECK:       if.then:
714; CHECK-NEXT:    br i1 poison, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]]
715; CHECK:       if.then8:
716; CHECK-NEXT:    br label [[FOR_INC]]
717; CHECK:       if.else:
718; CHECK-NEXT:    br i1 poison, label [[IF_THEN16:%.*]], label [[FOR_INC]]
719; CHECK:       if.then16:
720; CHECK-NEXT:    br label [[FOR_INC]]
721; CHECK:       for.inc:
722; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP29:![0-9]+]]
723; CHECK:       for.end:
724; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ poison, [[FOR_INC]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
725; CHECK-NEXT:    ret float [[SUM_1_LCSSA]]
726;
727entry:
728  br label %for.body
729
730for.body:
731  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
732  %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
733  %arrayidx = getelementptr inbounds float, ptr %A, i64 %indvars.iv
734  %l0 = load float, ptr %arrayidx, align 4
735  %arrayidx2 = getelementptr inbounds float, ptr %B, i64 %indvars.iv
736  %l1 = load float, ptr %arrayidx2, align 4
737  %cmp3 = fcmp ogt float %l0, %l1
738  br i1 %cmp3, label %if.then, label %for.inc
739
740if.then:
741  %cmp6 = fcmp ogt float %l1, 1.000000e+00
742  br i1 %cmp6, label %if.then8, label %if.else
743
744if.then8:
745  %add = fadd fast float %sum.033, %l0
746  br label %for.inc
747
748if.else:
749  %cmp14 = fcmp ogt float %l0, 2.000000e+00
750  br i1 %cmp14, label %if.then16, label %for.inc
751
752if.then16:
753  %add19 = fadd fast float %sum.033, %l1
754  br label %for.inc
755
756for.inc:
757  %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ]
758  %indvars.iv.next = add i64 %indvars.iv, 1
759  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
760  %exitcond = icmp ne i32 %lftr.wideiv, 128
761  br i1 %exitcond, label %for.body, label %for.end
762
763for.end:
764  %sum.1.lcssa = phi float [ %sum.1, %for.inc ]
765  ret float %sum.1.lcssa
766}
767
768define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) {
769; CHECK-LABEL: @reduction_sum_multiuse(
770; CHECK-NEXT:  entry:
771; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
772; CHECK:       .lr.ph:
773; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY:%.*]] ]
774; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY]] ]
775; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
776; CHECK-NEXT:    [[L3:%.*]] = load i32, ptr [[L2]], align 4
777; CHECK-NEXT:    [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
778; CHECK-NEXT:    [[L7:%.*]] = add i32 [[SUM_02]], [[L6]]
779; CHECK-NEXT:    [[L8:%.*]] = add i32 [[L7]], [[L3]]
780; CHECK-NEXT:    [[L10]] = add i32 [[L8]], [[SUM_02]]
781; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
782; CHECK-NEXT:    [[TMP0:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
783; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[TMP0]], 256
784; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[DOTLR_PH]]
785; CHECK:       end:
786; CHECK-NEXT:    ret i32 [[L10]]
787;
788entry:
789  br label %.lr.ph
790
791.lr.ph:                                           ; preds = %entry, %.lr.ph
792  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
793  %sum.02 = phi i32 [ %l10, %.lr.ph ], [ 0, %entry ]
794  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
795  %l3 = load i32, ptr %l2, align 4
796  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
797  %l5 = load i32, ptr %l4, align 4
798  %l6 = trunc i64 %indvars.iv to i32
799  %l7 = add i32 %sum.02, %l6
800  %l8 = add i32 %l7, %l3
801  %l9 = add i32 %l8, %l5
802  %l10 = add i32 %l8, %sum.02
803  %indvars.iv.next = add i64 %indvars.iv, 1
804  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
805  %exitcond = icmp eq i32 %lftr.wideiv, 256
806  br i1 %exitcond, label %end, label %.lr.ph
807
808end:
809  %f1 = phi i32 [ %l10, %.lr.ph ]
810  ret i32 %f1
811}
812
813; Predicated loop, cannot (yet) use in-loop reductions.
814define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture %B) {
815; CHECK-LABEL: @reduction_predicated(
816; CHECK-NEXT:  entry:
817; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
818; CHECK:       vector.ph:
819; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
820; CHECK:       vector.body:
821; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
822; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
823; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
824; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]]
825; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
826; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]]
827; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
828; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]])
829; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], [[VEC_PHI]]
830; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]])
831; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP3]]
832; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD1]])
833; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[TMP5]]
834; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
835; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
836; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
837; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
838; CHECK:       middle.block:
839; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
840; CHECK:       scalar.ph:
841; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
842; CHECK:       .lr.ph:
843; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP31:![0-9]+]]
844; CHECK:       ._crit_edge:
845; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ poison, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
846; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
847;
848entry:
849  br label %.lr.ph
850
851.lr.ph:                                           ; preds = %entry, %.lr.ph
852  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
853  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
854  %l2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
855  %l3 = load i32, ptr %l2, align 4
856  %l4 = getelementptr inbounds i32, ptr %B, i64 %indvars.iv
857  %l5 = load i32, ptr %l4, align 4
858  %l6 = trunc i64 %indvars.iv to i32
859  %l7 = add i32 %sum.02, %l6
860  %l8 = add i32 %l7, %l3
861  %l9 = add i32 %l8, %l5
862  %indvars.iv.next = add i64 %indvars.iv, 1
863  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
864  %exitcond = icmp eq i32 %lftr.wideiv, 256
865  br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !6
866
867._crit_edge:                                      ; preds = %.lr.ph
868  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
869  ret i32 %sum.0.lcssa
870}
871
872define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
873; CHECK-LABEL: @reduction_add_trunc(
874; CHECK-NEXT:  entry:
875; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
876; CHECK:       vector.ph:
877; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
878; CHECK:       vector.body:
879; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
880; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 0, i8 0, i8 0>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
881; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
882; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
883; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
884; CHECK-NEXT:    [[TMP2]] = add <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
885; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
886; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
887; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
888; CHECK:       middle.block:
889; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> [[TMP2]])
890; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
891; CHECK:       scalar.ph:
892; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
893; CHECK:       .lr.ph:
894; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP33:![0-9]+]]
895; CHECK:       ._crit_edge:
896; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
897; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
898;
899entry:
900  br label %.lr.ph
901
902.lr.ph:                                           ; preds = %entry, %.lr.ph
903  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
904  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
905  %sum.02 = and i32 %sum.02p, 255
906  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
907  %l3 = load i8, ptr %l2, align 4
908  %l3e = zext i8 %l3 to i32
909  %l9 = add i32 %sum.02, %l3e
910  %indvars.iv.next = add i32 %indvars.iv, 1
911  %exitcond = icmp eq i32 %indvars.iv.next, 256
912  br i1 %exitcond, label %._crit_edge, label %.lr.ph
913
914._crit_edge:                                      ; preds = %.lr.ph
915  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
916  %ret = trunc i32 %sum.0.lcssa to i8
917  ret i8 %ret
918}
919
920
921define i8 @reduction_and_trunc(ptr noalias nocapture %A) {
922; CHECK-LABEL: @reduction_and_trunc(
923; CHECK-NEXT:  entry:
924; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
925; CHECK:       vector.ph:
926; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
927; CHECK:       vector.body:
928; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
929; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ splat (i8 -1), [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
930; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
931; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]]
932; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4
933; CHECK-NEXT:    [[TMP2]] = and <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
934; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
935; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
936; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
937; CHECK:       middle.block:
938; CHECK-NEXT:    [[TMP4:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> [[TMP2]])
939; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
940; CHECK:       scalar.ph:
941; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
942; CHECK:       .lr.ph:
943; CHECK-NEXT:    br i1 poison, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop [[LOOP35:![0-9]+]]
944; CHECK:       ._crit_edge:
945; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ poison, [[DOTLR_PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
946; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
947;
948entry:
949  br label %.lr.ph
950
951.lr.ph:                                           ; preds = %entry, %.lr.ph
952  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
953  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
954  %sum.02 = and i32 %sum.02p, 255
955  %l2 = getelementptr inbounds i8, ptr %A, i32 %indvars.iv
956  %l3 = load i8, ptr %l2, align 4
957  %l3e = zext i8 %l3 to i32
958  %l9 = and i32 %sum.02, %l3e
959  %indvars.iv.next = add i32 %indvars.iv, 1
960  %exitcond = icmp eq i32 %indvars.iv.next, 256
961  br i1 %exitcond, label %._crit_edge, label %.lr.ph
962
963._crit_edge:                                      ; preds = %.lr.ph
964  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
965  %ret = trunc i32 %sum.0.lcssa to i8
966  ret i8 %ret
967}
968
969; Test case when loop has a call to the llvm.fmuladd intrinsic.
970define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) {
971; CHECK-LABEL: @reduction_fmuladd(
972; CHECK-NEXT:  entry:
973; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
974; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
975; CHECK:       vector.ph:
976; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
977; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
978; CHECK:       vector.body:
979; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
980; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
981; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
982; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4
983; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]]
984; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
985; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
986; CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP2]])
987; CHECK-NEXT:    [[TMP4]] = fadd float [[TMP3]], [[VEC_PHI]]
988; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
989; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
990; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
991; CHECK:       middle.block:
992; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
993; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
994; CHECK:       scalar.ph:
995; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
996; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
997; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
998; CHECK:       for.body:
999; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1000; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
1001; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1002; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1003; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1004; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1005; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP6]], float [[TMP7]], float [[SUM_07]])
1006; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1007; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1008; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
1009; CHECK:       for.end:
1010; CHECK-NEXT:    [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
1011; CHECK-NEXT:    ret float [[MULADD_LCSSA]]
1012;
1013
1014entry:
1015  br label %for.body
1016
1017for.body:
1018  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1019  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
1020  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1021  %0 = load float, ptr %arrayidx, align 4
1022  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
1023  %1 = load float, ptr %arrayidx2, align 4
1024  %muladd = tail call float @llvm.fmuladd.f32(float %0, float %1, float %sum.07)
1025  %iv.next = add nuw nsw i64 %iv, 1
1026  %exitcond.not = icmp eq i64 %iv.next, %n
1027  br i1 %exitcond.not, label %for.end, label %for.body
1028
1029for.end:
1030  ret float %muladd
1031}
1032
1033define float @reduction_fmuladd_recurrence_first_arg(ptr %a, ptr %b, i64 %n) {
1034; CHECK-LABEL: @reduction_fmuladd_recurrence_first_arg(
1035; CHECK-NEXT:  entry:
1036; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1037; CHECK:       for.body:
1038; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1039; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
1040; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IV]]
1041; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1042; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[IV]]
1043; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1044; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[SUM_07]], float [[TMP0]], float [[TMP1]])
1045; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1046; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1047; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1048; CHECK:       for.end:
1049; CHECK-NEXT:    ret float [[MULADD]]
1050;
1051
1052entry:
1053  br label %for.body
1054
1055for.body:
1056  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1057  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
1058  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1059  %0 = load float, ptr %arrayidx, align 4
1060  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
1061  %1 = load float, ptr %arrayidx2, align 4
1062  %muladd = tail call float @llvm.fmuladd.f32(float %sum.07, float %0, float %1)
1063  %iv.next = add nuw nsw i64 %iv, 1
1064  %exitcond.not = icmp eq i64 %iv.next, %n
1065  br i1 %exitcond.not, label %for.end, label %for.body
1066
1067for.end:
1068  ret float %muladd
1069}
1070
1071define float @reduction_fmuladd_recurrence_second_arg(ptr %a, ptr %b, i64 %n) {
1072; CHECK-LABEL: @reduction_fmuladd_recurrence_second_arg(
1073; CHECK-NEXT:  entry:
1074; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1075; CHECK:       for.body:
1076; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1077; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
1078; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[IV]]
1079; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1080; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw float, ptr [[B:%.*]], i64 [[IV]]
1081; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1082; CHECK-NEXT:    [[MULADD]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[SUM_07]], float [[TMP1]])
1083; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1084; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1085; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1086; CHECK:       for.end:
1087; CHECK-NEXT:    ret float [[MULADD]]
1088;
1089
1090entry:
1091  br label %for.body
1092
1093for.body:
1094  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1095  %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ]
1096  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1097  %0 = load float, ptr %arrayidx, align 4
1098  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
1099  %1 = load float, ptr %arrayidx2, align 4
1100  %muladd = tail call float @llvm.fmuladd.f32(float %0, float %sum.07, float %1)
1101  %iv.next = add nuw nsw i64 %iv, 1
1102  %exitcond.not = icmp eq i64 %iv.next, %n
1103  br i1 %exitcond.not, label %for.end, label %for.body
1104
1105for.end:
1106  ret float %muladd
1107}
1108
1109; This case was previously failing verification due to the mask for the
1110; reduction being created after the reduction.
1111define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h, i32 noundef %i) {
1112; CHECK-LABEL: @predicated_not_dominates_reduction(
1113; CHECK-NEXT:  entry:
1114; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I:%.*]], 4
1115; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1116; CHECK:       vector.ph:
1117; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[I]], -4
1118; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1119; CHECK:       vector.body:
1120; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1121; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1122; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
1123; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
1124; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
1125; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31)
1126; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3)
1127; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31)
1128; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32>
1129; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
1130; CHECK-NEXT:    [[TMP7]] = add i32 [[TMP6]], [[VEC_PHI]]
1131; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1132; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1133; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
1134; CHECK:       middle.block:
1135; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]]
1136; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]]
1137; CHECK:       scalar.ph:
1138; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1139; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ]
1140; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
1141; CHECK:       for.body2:
1142; CHECK-NEXT:    [[A_117:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_INC5:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1143; CHECK-NEXT:    [[G_016:%.*]] = phi i32 [ [[G_1:%.*]], [[FOR_INC5]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
1144; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[A_117]] to i64
1145; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[H]], i64 [[TMP9]]
1146; CHECK-NEXT:    [[TMP10:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1147; CHECK-NEXT:    [[TOBOOL3_NOT:%.*]] = icmp eq i8 [[TMP10]], 0
1148; CHECK-NEXT:    br i1 [[TOBOOL3_NOT]], label [[FOR_INC5]], label [[IF_THEN:%.*]]
1149; CHECK:       if.then:
1150; CHECK-NEXT:    [[TMP11:%.*]] = udiv i8 [[TMP10]], 31
1151; CHECK-NEXT:    [[TMP12:%.*]] = shl nuw nsw i8 [[TMP11]], 3
1152; CHECK-NEXT:    [[TMP13:%.*]] = udiv i8 [[TMP12]], 31
1153; CHECK-NEXT:    [[DIV4:%.*]] = zext nneg i8 [[TMP13]] to i32
1154; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[G_016]], [[DIV4]]
1155; CHECK-NEXT:    br label [[FOR_INC5]]
1156; CHECK:       for.inc5:
1157; CHECK-NEXT:    [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ]
1158; CHECK-NEXT:    [[INC6]] = add nuw nsw i32 [[A_117]], 1
1159; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]]
1160; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP39:![0-9]+]]
1161; CHECK:       for.end7:
1162; CHECK-NEXT:    [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1163; CHECK-NEXT:    ret i32 [[G_1_LCSSA]]
1164;
1165entry:
1166  br label %for.body2
1167
1168for.body2:                                        ; preds = %entry, %for.inc5
1169  %a.117 = phi i32 [ %inc6, %for.inc5 ], [ 0, %entry ]
1170  %g.016 = phi i32 [ %g.1, %for.inc5 ], [ undef, %entry ]
1171  %arrayidx = getelementptr inbounds i8, ptr %h, i32 %a.117
1172  %0 = load i8, ptr %arrayidx, align 1
1173  %tobool3.not = icmp eq i8 %0, 0
1174  br i1 %tobool3.not, label %for.inc5, label %if.then
1175
1176if.then:                                          ; preds = %for.body2
1177  %1 = udiv i8 %0, 31
1178  %2 = shl nuw nsw i8 %1, 3
1179  %3 = udiv i8 %2, 31
1180  %div4 = zext i8 %3 to i32
1181  %add = add nsw i32 %g.016, %div4
1182  br label %for.inc5
1183
1184for.inc5:                                         ; preds = %for.body2, %if.then
1185  %g.1 = phi i32 [ %add, %if.then ], [ %g.016, %for.body2 ]
1186  %inc6 = add nuw nsw i32 %a.117, 1
1187  %exitcond.not = icmp eq i32 %inc6, %i
1188  br i1 %exitcond.not, label %for.end7, label %for.body2
1189
1190for.end7:                                         ; preds = %for.inc5
1191  %g.1.lcssa = phi i32 [ %g.1, %for.inc5 ]
1192  ret i32 %g.1.lcssa
1193}
1194
1195define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef readonly %h, i32 noundef %i) {
1196; CHECK-LABEL: @predicated_not_dominates_reduction_twoadd(
1197; CHECK-NEXT:  entry:
1198; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I:%.*]], 4
1199; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1200; CHECK:       vector.ph:
1201; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[I]], -4
1202; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1203; CHECK:       vector.body:
1204; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1205; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
1206; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
1207; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H:%.*]], i64 [[TMP0]]
1208; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
1209; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
1210; CHECK-NEXT:    [[TMP2:%.*]] = udiv <4 x i8> [[WIDE_LOAD]], splat (i8 31)
1211; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <4 x i8> [[TMP2]], splat (i8 3)
1212; CHECK-NEXT:    [[TMP4:%.*]] = udiv <4 x i8> [[TMP3]], splat (i8 31)
1213; CHECK-NEXT:    [[TMP5:%.*]] = zext nneg <4 x i8> [[TMP4]] to <4 x i32>
1214; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
1215; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
1216; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP7]], [[VEC_PHI]]
1217; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]]
1218; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
1219; CHECK-NEXT:    [[TMP11]] = add i32 [[TMP10]], [[TMP8]]
1220; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1221; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1222; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
1223; CHECK:       middle.block:
1224; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[I]], [[N_VEC]]
1225; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END7:%.*]], label [[SCALAR_PH]]
1226; CHECK:       scalar.ph:
1227; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1228; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ undef, [[ENTRY]] ]
1229; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
1230; CHECK:       for.body2:
1231; CHECK-NEXT:    [[A_117:%.*]] = phi i32 [ [[INC6:%.*]], [[FOR_INC5:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1232; CHECK-NEXT:    [[G_016:%.*]] = phi i32 [ [[G_1:%.*]], [[FOR_INC5]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
1233; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[A_117]] to i64
1234; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[H]], i64 [[TMP13]]
1235; CHECK-NEXT:    [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1236; CHECK-NEXT:    [[TOBOOL3_NOT:%.*]] = icmp eq i8 [[TMP14]], 0
1237; CHECK-NEXT:    br i1 [[TOBOOL3_NOT]], label [[FOR_INC5]], label [[IF_THEN:%.*]]
1238; CHECK:       if.then:
1239; CHECK-NEXT:    [[TMP15:%.*]] = udiv i8 [[TMP14]], 31
1240; CHECK-NEXT:    [[TMP16:%.*]] = shl nuw nsw i8 [[TMP15]], 3
1241; CHECK-NEXT:    [[TMP17:%.*]] = udiv i8 [[TMP16]], 31
1242; CHECK-NEXT:    [[TMP18:%.*]] = shl nuw nsw i8 [[TMP17]], 1
1243; CHECK-NEXT:    [[REASS_ADD:%.*]] = zext nneg i8 [[TMP18]] to i32
1244; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[G_016]], [[REASS_ADD]]
1245; CHECK-NEXT:    br label [[FOR_INC5]]
1246; CHECK:       for.inc5:
1247; CHECK-NEXT:    [[G_1]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ [[G_016]], [[FOR_BODY2]] ]
1248; CHECK-NEXT:    [[INC6]] = add nuw nsw i32 [[A_117]], 1
1249; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC6]], [[I]]
1250; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END7]], label [[FOR_BODY2]], !llvm.loop [[LOOP41:![0-9]+]]
1251; CHECK:       for.end7:
1252; CHECK-NEXT:    [[G_1_LCSSA:%.*]] = phi i32 [ [[G_1]], [[FOR_INC5]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
1253; CHECK-NEXT:    ret i32 [[G_1_LCSSA]]
1254;
1255entry:
1256  br label %for.body2
1257
1258for.body2:                                        ; preds = %entry, %for.inc5
1259  %a.117 = phi i32 [ %inc6, %for.inc5 ], [ 0, %entry ]
1260  %g.016 = phi i32 [ %g.1, %for.inc5 ], [ undef, %entry ]
1261  %arrayidx = getelementptr inbounds i8, ptr %h, i32 %a.117
1262  %0 = load i8, ptr %arrayidx, align 1
1263  %tobool3.not = icmp eq i8 %0, 0
1264  br i1 %tobool3.not, label %for.inc5, label %if.then
1265
1266if.then:                                          ; preds = %for.body2
1267  %1 = udiv i8 %0, 31
1268  %2 = shl nuw nsw i8 %1, 3
1269  %3 = udiv i8 %2, 31
1270  %div4 = zext i8 %3 to i32
1271  %add1 = add nsw i32 %g.016, %div4
1272  %add = add nsw i32 %add1, %div4
1273  br label %for.inc5
1274
1275for.inc5:                                         ; preds = %for.body2, %if.then
1276  %g.1 = phi i32 [ %add, %if.then ], [ %g.016, %for.body2 ]
1277  %inc6 = add nuw nsw i32 %a.117, 1
1278  %exitcond.not = icmp eq i32 %inc6, %i
1279  br i1 %exitcond.not, label %for.end7, label %for.body2
1280
1281for.end7:                                         ; preds = %for.inc5
1282  %g.1.lcssa = phi i32 [ %g.1, %for.inc5 ]
1283  ret i32 %g.1.lcssa
1284}
1285
1286%struct.e = type { i32, i32 }
1287define i32 @predicated_or_dominates_reduction(ptr %b) {
1288; CHECK-LABEL: @predicated_or_dominates_reduction(
1289; CHECK-NEXT:  entry:
1290; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1291; CHECK:       vector.ph:
1292; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1293; CHECK:       vector.body:
1294; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ]
1295; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ undef, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_LOAD_CONTINUE6]] ]
1296; CHECK-NEXT:    [[TMP0:%.*]] = or disjoint i32 [[INDEX]], 1
1297; CHECK-NEXT:    [[TMP1:%.*]] = or disjoint i32 [[INDEX]], 2
1298; CHECK-NEXT:    [[TMP2:%.*]] = or disjoint i32 [[INDEX]], 3
1299; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[INDEX]] to i64
1300; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B:%.*]], i64 0, i64 [[TMP3]], i32 1
1301; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP0]] to i64
1302; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP5]], i32 1
1303; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP1]] to i64
1304; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP7]], i32 1
1305; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP2]] to i64
1306; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP9]], i32 1
1307; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4
1308; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4
1309; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4
1310; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP10]], align 4
1311; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> poison, i32 [[TMP11]], i64 0
1312; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP12]], i64 1
1313; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP13]], i64 2
1314; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP14]], i64 3
1315; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq <4 x i32> [[TMP18]], zeroinitializer
1316; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i64 0
1317; CHECK-NEXT:    br i1 [[TMP20]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
1318; CHECK:       pred.load.if:
1319; CHECK-NEXT:    [[TMP21:%.*]] = sext i32 [[INDEX]] to i64
1320; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP21]]
1321; CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
1322; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i64 0
1323; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
1324; CHECK:       pred.load.continue:
1325; CHECK-NEXT:    [[TMP25:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP24]], [[PRED_LOAD_IF]] ]
1326; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i64 1
1327; CHECK-NEXT:    br i1 [[TMP26]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
1328; CHECK:       pred.load.if1:
1329; CHECK-NEXT:    [[TMP27:%.*]] = sext i32 [[TMP0]] to i64
1330; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP27]]
1331; CHECK-NEXT:    [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
1332; CHECK-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP29]], i64 1
1333; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
1334; CHECK:       pred.load.continue2:
1335; CHECK-NEXT:    [[TMP31:%.*]] = phi <4 x i32> [ [[TMP25]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP30]], [[PRED_LOAD_IF1]] ]
1336; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP19]], i64 2
1337; CHECK-NEXT:    br i1 [[TMP32]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
1338; CHECK:       pred.load.if3:
1339; CHECK-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP1]] to i64
1340; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP33]]
1341; CHECK-NEXT:    [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4
1342; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP35]], i64 2
1343; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
1344; CHECK:       pred.load.continue4:
1345; CHECK-NEXT:    [[TMP37:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP36]], [[PRED_LOAD_IF3]] ]
1346; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP19]], i64 3
1347; CHECK-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]]
1348; CHECK:       pred.load.if5:
1349; CHECK-NEXT:    [[TMP39:%.*]] = sext i32 [[TMP2]] to i64
1350; CHECK-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i64 0, i64 [[TMP39]]
1351; CHECK-NEXT:    [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
1352; CHECK-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP41]], i64 3
1353; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
1354; CHECK:       pred.load.continue6:
1355; CHECK-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP42]], [[PRED_LOAD_IF5]] ]
1356; CHECK-NEXT:    [[TMP44:%.*]] = icmp ne <4 x i32> [[TMP43]], zeroinitializer
1357; CHECK-NEXT:    [[NOT_:%.*]] = xor <4 x i1> [[TMP19]], splat (i1 true)
1358; CHECK-NEXT:    [[DOTNOT7:%.*]] = select <4 x i1> [[NOT_]], <4 x i1> splat (i1 true), <4 x i1> [[TMP44]]
1359; CHECK-NEXT:    [[TMP45:%.*]] = bitcast <4 x i1> [[DOTNOT7]] to i4
1360; CHECK-NEXT:    [[TMP46:%.*]] = call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 [[TMP45]])
1361; CHECK-NEXT:    [[TMP47:%.*]] = zext nneg i4 [[TMP46]] to i32
1362; CHECK-NEXT:    [[TMP48]] = add i32 [[VEC_PHI]], [[TMP47]]
1363; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1364; CHECK-NEXT:    [[TMP49:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
1365; CHECK-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
1366; CHECK:       middle.block:
1367; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
1368; CHECK:       scalar.ph:
1369; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1370; CHECK:       for.cond.cleanup:
1371; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ poison, [[FOR_INC:%.*]] ], [ [[TMP48]], [[MIDDLE_BLOCK]] ]
1372; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
1373; CHECK:       for.body:
1374; CHECK-NEXT:    br i1 poison, label [[LOR_LHS_FALSE:%.*]], label [[IF_THEN:%.*]]
1375; CHECK:       lor.lhs.false:
1376; CHECK-NEXT:    br i1 poison, label [[FOR_INC]], label [[IF_THEN]]
1377; CHECK:       if.then:
1378; CHECK-NEXT:    br label [[FOR_INC]]
1379; CHECK:       for.inc:
1380; CHECK-NEXT:    br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
1381;
1382entry:
1383  br label %for.body
1384
1385for.cond.cleanup:                                 ; preds = %for.inc
1386  %a.1.lcssa = phi i32 [ %a.1, %for.inc ]
1387  ret i32 %a.1.lcssa
1388
1389for.body:                                         ; preds = %entry, %for.inc
1390  %g.09 = phi i32 [ 0, %entry ], [ %inc3, %for.inc ]
1391  %a.08 = phi i32 [ undef, %entry ], [ %a.1, %for.inc ]
1392  %d = getelementptr inbounds [0 x %struct.e], ptr %b, i32 0, i32 %g.09, i32 1
1393  %0 = load i32, ptr %d, align 4
1394  %tobool.not = icmp eq i32 %0, 0
1395  br i1 %tobool.not, label %lor.lhs.false, label %if.then
1396
1397lor.lhs.false:                                    ; preds = %for.body
1398  %arrayidx = getelementptr inbounds [0 x %struct.e], ptr %b, i32 0, i32 %g.09
1399  %1 = load i32, ptr %arrayidx, align 4
1400  %tobool2.not = icmp eq i32 %1, 0
1401  br i1 %tobool2.not, label %for.inc, label %if.then
1402
1403if.then:                                          ; preds = %lor.lhs.false, %for.body
1404  %inc = add nsw i32 %a.08, 1
1405  br label %for.inc
1406
1407for.inc:                                          ; preds = %lor.lhs.false, %if.then
1408  %a.1 = phi i32 [ %inc, %if.then ], [ %a.08, %lor.lhs.false ]
1409  %inc3 = add nuw nsw i32 %g.09, 1
1410  %exitcond.not = icmp eq i32 %inc3, 1000
1411  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
1412}
1413
1414declare float @llvm.fmuladd.f32(float, float, float)
1415
1416!6 = distinct !{!6, !7, !8}
1417!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
1418!8 = !{!"llvm.loop.vectorize.enable", i1 true}
1419