xref: /llvm-project/llvm/test/Transforms/LoopVectorize/if-reduction.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5
6; Float pattern:
7;   Check vectorization of reduction code which has an fadd instruction after
8;   an fcmp instruction which compares an array element and 0.
9;
10; float fcmp_0_fadd_select1(ptr restrict x, const int N) {
11;   float sum = 0.
12;   for (int i = 0; i < N; ++i)
13;     if (x[i] > (float)0.)
14;       sum += x[i];
15;   return sum;
16; }
17
18define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
19; CHECK-LABEL: define float @fcmp_0_fadd_select1(
20; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
21; CHECK-NEXT:  [[ENTRY:.*]]:
22; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
23; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
24; CHECK:       [[FOR_HEADER]]:
25; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
26; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
27; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
28; CHECK:       [[VECTOR_PH]]:
29; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
30; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
31; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
32; CHECK:       [[VECTOR_BODY]]:
33; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
34; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
35; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
36; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
37; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
38; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
39; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
40; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
41; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
42; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
43; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
44; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
45; CHECK:       [[MIDDLE_BLOCK]]:
46; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
47; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
48; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
49; CHECK:       [[SCALAR_PH]]:
50; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
51; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
52; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
53; CHECK:       [[FOR_BODY]]:
54; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
55; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
56; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
57; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
58; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
59; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
60; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
61; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
62; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
63; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
64; CHECK:       [[FOR_END_LOOPEXIT]]:
65; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
66; CHECK-NEXT:    br label %[[FOR_END]]
67; CHECK:       [[FOR_END]]:
68; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
69; CHECK-NEXT:    ret float [[TMP9]]
70;
71entry:
72  %cmp.1 = icmp sgt i32 %N, 0
73  br i1 %cmp.1, label %for.header, label %for.end
74
75for.header:                                       ; preds = %entry
76  %zext = zext i32 %N to i64
77  br label %for.body
78
79for.body:                                         ; preds = %header, %for.body
80  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
81  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
82  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
83  %0 = load float, ptr %arrayidx, align 4
84  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
85  %add = fadd fast float %0, %sum.1
86  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
87  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
88  %exitcond = icmp eq i64 %indvars.iv.next, %zext
89  br i1 %exitcond, label %for.end, label %for.body
90
91for.end:                                          ; preds = %for.body, %entry
92  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
93  ret float %1
94}
95
96; Double pattern:
97;   Check vectorization of reduction code which has an fadd instruction after
98;   an fcmp instruction which compares an array element and 0.
99;
100; double fcmp_0_fadd_select2(ptr restrict x, const int N) {
101;   double sum = 0.
102;   for (int i = 0; i < N; ++i)
103;     if (x[i] > 0.)
104;       sum += x[i];
105;   return sum;
106; }
107
108define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
109; CHECK-LABEL: define double @fcmp_0_fadd_select2(
110; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
111; CHECK-NEXT:  [[ENTRY:.*]]:
112; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
113; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
114; CHECK:       [[FOR_HEADER]]:
115; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
116; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
117; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
118; CHECK:       [[VECTOR_PH]]:
119; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
120; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
121; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
122; CHECK:       [[VECTOR_BODY]]:
123; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
124; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
125; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
126; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
127; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
128; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
129; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
130; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
131; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
132; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
133; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
134; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
135; CHECK:       [[MIDDLE_BLOCK]]:
136; CHECK-NEXT:    [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
137; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
138; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
139; CHECK:       [[SCALAR_PH]]:
140; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
141; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
142; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
143; CHECK:       [[FOR_BODY]]:
144; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
145; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
146; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
147; CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
148; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
149; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
150; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
151; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
152; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
153; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
154; CHECK:       [[FOR_END_LOOPEXIT]]:
155; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
156; CHECK-NEXT:    br label %[[FOR_END]]
157; CHECK:       [[FOR_END]]:
158; CHECK-NEXT:    [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
159; CHECK-NEXT:    ret double [[TMP9]]
160;
161entry:
162  %cmp.1 = icmp sgt i32 %N, 0
163  br i1 %cmp.1, label %for.header, label %for.end
164
165for.header:                                       ; preds = %entry
166  %zext = zext i32 %N to i64
167  br label %for.body
168
169for.body:                                         ; preds = %header, %for.body
170  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
171  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
172  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
173  %0 = load double, ptr %arrayidx, align 4
174  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
175  %add = fadd fast double %0, %sum.1
176  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
177  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
178  %exitcond = icmp eq i64 %indvars.iv.next, %zext
179  br i1 %exitcond, label %for.end, label %for.body
180
181for.end:                                          ; preds = %for.body, %entry
182  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
183  ret double %1
184}
185
186; Float pattern:
187;   Check vectorization of reduction code which has an fadd instruction after
188;   an fcmp instruction which compares an array element and a floating-point
189;   value.
190;
191; float fcmp_val_fadd_select1(ptr restrict x, float y, const int N) {
192;   float sum = 0.
193;   for (int i = 0; i < N; ++i)
194;     if (x[i] > y)
195;       sum += x[i];
196;   return sum;
197; }
198
199define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
200; CHECK-LABEL: define float @fcmp_val_fadd_select1(
201; CHECK-SAME: ptr noalias [[X:%.*]], float [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
202; CHECK-NEXT:  [[ENTRY:.*]]:
203; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
204; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
205; CHECK:       [[FOR_HEADER]]:
206; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
207; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
208; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
209; CHECK:       [[VECTOR_PH]]:
210; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
211; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
212; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0
213; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
214; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
215; CHECK:       [[VECTOR_BODY]]:
216; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
217; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
218; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
219; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
220; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
221; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
222; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
223; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
224; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
225; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
226; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
227; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
228; CHECK:       [[MIDDLE_BLOCK]]:
229; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
230; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
231; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
232; CHECK:       [[SCALAR_PH]]:
233; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
234; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
235; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
236; CHECK:       [[FOR_BODY]]:
237; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
238; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
239; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
240; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
241; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], [[Y]]
242; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
243; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
244; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
245; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
246; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
247; CHECK:       [[FOR_END_LOOPEXIT]]:
248; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
249; CHECK-NEXT:    br label %[[FOR_END]]
250; CHECK:       [[FOR_END]]:
251; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
252; CHECK-NEXT:    ret float [[TMP9]]
253;
254entry:
255  %cmp.1 = icmp sgt i32 %N, 0
256  br i1 %cmp.1, label %for.header, label %for.end
257
258for.header:                                       ; preds = %entry
259  %zext = zext i32 %N to i64
260  br label %for.body
261
262for.body:                                         ; preds = %header, %for.body
263  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
264  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
265  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
266  %0 = load float, ptr %arrayidx, align 4
267  %cmp.2 = fcmp fast ogt float %0, %y
268  %add = fadd fast float %0, %sum.1
269  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
270  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
271  %exitcond = icmp eq i64 %indvars.iv.next, %zext
272  br i1 %exitcond, label %for.end, label %for.body
273
274for.end:                                          ; preds = %for.body, %entry
275  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
276  ret float %1
277}
278
279; Double pattern:
280;   Check vectorization of reduction code which has an fadd instruction after
281;   an fcmp instruction which compares an array element and a floating-point
282;   value.
283;
284; double fcmp_val_fadd_select2(ptr restrict x, double y, const int N) {
285;   double sum = 0.
286;   for (int i = 0; i < N; ++i)
287;     if (x[i] > y)
288;       sum += x[i];
289;   return sum;
290; }
291
292define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
293; CHECK-LABEL: define double @fcmp_val_fadd_select2(
294; CHECK-SAME: ptr noalias [[X:%.*]], double [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
295; CHECK-NEXT:  [[ENTRY:.*]]:
296; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
297; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
298; CHECK:       [[FOR_HEADER]]:
299; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
300; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
301; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
302; CHECK:       [[VECTOR_PH]]:
303; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
304; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
305; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
306; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
307; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
308; CHECK:       [[VECTOR_BODY]]:
309; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
310; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
311; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
312; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
313; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
314; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
315; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
316; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
317; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
318; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
319; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
320; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
321; CHECK:       [[MIDDLE_BLOCK]]:
322; CHECK-NEXT:    [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
323; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
324; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
325; CHECK:       [[SCALAR_PH]]:
326; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
327; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
328; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
329; CHECK:       [[FOR_BODY]]:
330; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
331; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
332; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
333; CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
334; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], [[Y]]
335; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
336; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
337; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
338; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
339; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
340; CHECK:       [[FOR_END_LOOPEXIT]]:
341; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
342; CHECK-NEXT:    br label %[[FOR_END]]
343; CHECK:       [[FOR_END]]:
344; CHECK-NEXT:    [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
345; CHECK-NEXT:    ret double [[TMP9]]
346;
347entry:
348  %cmp.1 = icmp sgt i32 %N, 0
349  br i1 %cmp.1, label %for.header, label %for.end
350
351for.header:                                       ; preds = %entry
352  %zext = zext i32 %N to i64
353  br label %for.body
354
355for.body:                                         ; preds = %header, %for.body
356  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
357  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
358  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
359  %0 = load double, ptr %arrayidx, align 4
360  %cmp.2 = fcmp fast ogt double %0, %y
361  %add = fadd fast double %0, %sum.1
362  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
363  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
364  %exitcond = icmp eq i64 %indvars.iv.next, %zext
365  br i1 %exitcond, label %for.end, label %for.body
366
367for.end:                                          ; preds = %for.body, %entry
368  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
369  ret double %1
370}
371
372; Float pattern:
373;   Check vectorization of reduction code which has an fadd instruction after
374;   an fcmp instruction which compares an array element and another array
375;   element.
376;
377; float fcmp_array_elm_fadd_select1(ptr restrict x, ptr restrict y,
378;                                   const int N) {
379;   float sum = 0.
380;   for (int i = 0; i < N; ++i)
381;     if (x[i] > y[i])
382;       sum += x[i];
383;   return sum;
384; }
385
386define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
387; CHECK-LABEL: define float @fcmp_array_elm_fadd_select1(
388; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
389; CHECK-NEXT:  [[ENTRY:.*]]:
390; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
391; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
392; CHECK:       [[FOR_HEADER]]:
393; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
394; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
395; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
396; CHECK:       [[VECTOR_PH]]:
397; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
398; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
399; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
400; CHECK:       [[VECTOR_BODY]]:
401; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
402; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
403; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
404; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
405; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
406; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
407; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[TMP0]]
408; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
409; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
410; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
411; CHECK-NEXT:    [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
412; CHECK-NEXT:    [[TMP7]] = select <4 x i1> [[TMP5]], <4 x float> [[TMP6]], <4 x float> [[VEC_PHI]]
413; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
414; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
415; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
416; CHECK:       [[MIDDLE_BLOCK]]:
417; CHECK-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP7]])
418; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
419; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
420; CHECK:       [[SCALAR_PH]]:
421; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
422; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
423; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
424; CHECK:       [[FOR_BODY]]:
425; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
426; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
427; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
428; CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
429; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[INDVARS_IV]]
430; CHECK-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
431; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt float [[TMP10]], [[TMP11]]
432; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP10]], [[SUM_1]]
433; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
434; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
435; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
436; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
437; CHECK:       [[FOR_END_LOOPEXIT]]:
438; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
439; CHECK-NEXT:    br label %[[FOR_END]]
440; CHECK:       [[FOR_END]]:
441; CHECK-NEXT:    [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
442; CHECK-NEXT:    ret float [[TMP12]]
443;
444entry:
445  %cmp.1 = icmp sgt i32 %N, 0
446  br i1 %cmp.1, label %for.header, label %for.end
447
448for.header:                                       ; preds = %entry
449  %zext = zext i32 %N to i64
450  br label %for.body
451
452for.body:                                         ; preds = %for.body, %for.header
453  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
454  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
455  %arrayidx.1 = getelementptr inbounds float, ptr %x, i64 %indvars.iv
456  %0 = load float, ptr %arrayidx.1, align 4
457  %arrayidx.2 = getelementptr inbounds float, ptr %y, i64 %indvars.iv
458  %1 = load float, ptr %arrayidx.2, align 4
459  %cmp.2 = fcmp fast ogt float %0, %1
460  %add = fadd fast float %0, %sum.1
461  %sum.2 = select i1 %cmp.2, float %add, float %sum.1
462  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
463  %exitcond = icmp eq i64 %indvars.iv.next, %zext
464  br i1 %exitcond, label %for.end, label %for.body
465
466for.end:                                          ; preds = %for.body, %entry
467  %2 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
468  ret float %2
469}
470
471; Double pattern:
472;   Check vectorization of reduction code which has an fadd instruction after
473;   an fcmp instruction which compares an array element and another array
474;   element.
475;
476; double fcmp_array_elm_fadd_select2(ptr restrict x, ptr restrict y,
477;                                    const int N) {
478;   double sum = 0.
479;   for (int i = 0; i < N; ++i)
480;     if (x[i] > y[i])
481;       sum += x[i];
482;   return sum;
483; }
484
485define double @fcmp_array_elm_fadd_select2(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
486; CHECK-LABEL: define double @fcmp_array_elm_fadd_select2(
487; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
488; CHECK-NEXT:  [[ENTRY:.*]]:
489; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
490; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
491; CHECK:       [[FOR_HEADER]]:
492; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
493; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
494; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
495; CHECK:       [[VECTOR_PH]]:
496; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
497; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
498; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
499; CHECK:       [[VECTOR_BODY]]:
500; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
501; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
502; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
503; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
504; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
505; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
506; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[TMP0]]
507; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0
508; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 4
509; CHECK-NEXT:    [[TMP5:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[WIDE_LOAD1]]
510; CHECK-NEXT:    [[TMP6:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
511; CHECK-NEXT:    [[TMP7]] = select <4 x i1> [[TMP5]], <4 x double> [[TMP6]], <4 x double> [[VEC_PHI]]
512; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
513; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
514; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
515; CHECK:       [[MIDDLE_BLOCK]]:
516; CHECK-NEXT:    [[TMP9:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP7]])
517; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
518; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
519; CHECK:       [[SCALAR_PH]]:
520; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
521; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
522; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
523; CHECK:       [[FOR_BODY]]:
524; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
525; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
526; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
527; CHECK-NEXT:    [[TMP10:%.*]] = load double, ptr [[ARRAYIDX_1]], align 4
528; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[INDVARS_IV]]
529; CHECK-NEXT:    [[TMP11:%.*]] = load double, ptr [[ARRAYIDX_2]], align 4
530; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt double [[TMP10]], [[TMP11]]
531; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[TMP10]], [[SUM_1]]
532; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
533; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
534; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
535; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
536; CHECK:       [[FOR_END_LOOPEXIT]]:
537; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
538; CHECK-NEXT:    br label %[[FOR_END]]
539; CHECK:       [[FOR_END]]:
540; CHECK-NEXT:    [[TMP12:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
541; CHECK-NEXT:    ret double [[TMP12]]
542;
543entry:
544  %cmp.1 = icmp sgt i32 %N, 0
545  br i1 %cmp.1, label %for.header, label %for.end
546
547for.header:                                       ; preds = %entry
548  %zext = zext i32 %N to i64
549  br label %for.body
550
551for.body:                                         ; preds = %for.body, %for.header
552  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
553  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
554  %arrayidx.1 = getelementptr inbounds double, ptr %x, i64 %indvars.iv
555  %0 = load double, ptr %arrayidx.1, align 4
556  %arrayidx.2 = getelementptr inbounds double, ptr %y, i64 %indvars.iv
557  %1 = load double, ptr %arrayidx.2, align 4
558  %cmp.2 = fcmp fast ogt double %0, %1
559  %add = fadd fast double %0, %sum.1
560  %sum.2 = select i1 %cmp.2, double %add, double %sum.1
561  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
562  %exitcond = icmp eq i64 %indvars.iv.next, %zext
563  br i1 %exitcond, label %for.end, label %for.body
564
565for.end:                                          ; preds = %for.body, %entry
566  %2 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
567  ret double %2
568}
569
570; Float pattern:
571;   Check vectorization of reduction code which has an fsub instruction after
572;   an fcmp instruction which compares an array element and 0.
573;
574; float fcmp_0_fsub_select1(ptr restrict x, const int N) {
575;   float sum = 0.
576;   for (int i = 0; i < N; ++i)
577;     if (x[i] > (float)0.)
578;       sum -= x[i];
579;   return sum;
580; }
581
582define float @fcmp_0_fsub_select1(ptr noalias %x, i32 %N) nounwind readonly {
583; CHECK-LABEL: define float @fcmp_0_fsub_select1(
584; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
585; CHECK-NEXT:  [[ENTRY:.*]]:
586; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
587; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
588; CHECK:       [[FOR_HEADER]]:
589; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
590; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
591; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
592; CHECK:       [[VECTOR_PH]]:
593; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
594; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
595; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
596; CHECK:       [[VECTOR_BODY]]:
597; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
598; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
599; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
600; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
601; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
602; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
603; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
604; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
605; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
606; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
607; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
608; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
609; CHECK:       [[MIDDLE_BLOCK]]:
610; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
611; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
612; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
613; CHECK:       [[SCALAR_PH]]:
614; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
615; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
616; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
617; CHECK:       [[FOR_BODY]]:
618; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
619; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
620; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
621; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
622; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
623; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float [[SUM_1]], [[TMP8]]
624; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]]
625; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
626; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
627; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
628; CHECK:       [[FOR_END_LOOPEXIT]]:
629; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
630; CHECK-NEXT:    br label %[[FOR_END]]
631; CHECK:       [[FOR_END]]:
632; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
633; CHECK-NEXT:    ret float [[TMP9]]
634;
635entry:
636  %cmp.1 = icmp sgt i32 %N, 0
637  br i1 %cmp.1, label %for.header, label %for.end
638
639for.header:                                       ; preds = %entry
640  %zext = zext i32 %N to i64
641  br label %for.body
642
643for.body:                                         ; preds = %for.body, %for.header
644  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
645  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
646  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
647  %0 = load float, ptr %arrayidx, align 4
648  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
649  %sub = fsub fast float %sum.1, %0
650  %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
651  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
652  %exitcond = icmp eq i64 %indvars.iv.next, %zext
653  br i1 %exitcond, label %for.end, label %for.body
654
655for.end:                                          ; preds = %for.body, %entry
656  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
657  ret float %1
658}
659
660; Float pattern:
661;   Check that is not vectorized if fp-instruction has no fast-math property.
662; float fcmp_0_fsub_select1_novectorize(ptr restrict x, const int N) {
663;   float sum = 0.
664;   for (int i = 0; i < N; ++i)
665;     if (x[i] > (float)0.)
666;       sum -= x[i];
667;   return sum;
668; }
669
670define float @fcmp_0_fsub_select1_novectorize(ptr noalias %x, i32 %N) nounwind readonly {
671; CHECK-LABEL: define float @fcmp_0_fsub_select1_novectorize(
672; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
673; CHECK-NEXT:  [[ENTRY:.*]]:
674; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
675; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
676; CHECK:       [[FOR_HEADER]]:
677; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
678; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
679; CHECK:       [[FOR_BODY]]:
680; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
681; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
682; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
683; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
684; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
685; CHECK-NEXT:    [[SUB:%.*]] = fsub float [[SUM_1]], [[TMP0]]
686; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[SUB]], float [[SUM_1]]
687; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
688; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
689; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
690; CHECK:       [[FOR_END_LOOPEXIT]]:
691; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ]
692; CHECK-NEXT:    br label %[[FOR_END]]
693; CHECK:       [[FOR_END]]:
694; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
695; CHECK-NEXT:    ret float [[TMP1]]
696;
697entry:
698  %cmp.1 = icmp sgt i32 %N, 0
699  br i1 %cmp.1, label %for.header, label %for.end
700
701for.header:                                       ; preds = %entry
702  %zext = zext i32 %N to i64
703  br label %for.body
704
705for.body:                                         ; preds = %for.body, %for.header
706  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
707  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
708  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
709  %0 = load float, ptr %arrayidx, align 4
710  %cmp.2 = fcmp ogt float %0, 0.000000e+00
711  %sub = fsub float %sum.1, %0
712  %sum.2 = select i1 %cmp.2, float %sub, float %sum.1
713  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
714  %exitcond = icmp eq i64 %indvars.iv.next, %zext
715  br i1 %exitcond, label %for.end, label %for.body
716
717for.end:                                          ; preds = %for.body, %entry
718  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
719  ret float %1
720}
721
722; Double pattern:
723;   Check vectorization of reduction code which has an fsub instruction after
724;   an fcmp instruction which compares an array element and 0.
725;
726; double fcmp_0_fsub_select2(ptr restrict x, const int N) {
727;   double sum = 0.
728;   for (int i = 0; i < N; ++i)
729;     if (x[i] > 0.)
730;       sum -= x[i];
731;   return sum;
732; }
733
734define double @fcmp_0_fsub_select2(ptr noalias %x, i32 %N) nounwind readonly {
735; CHECK-LABEL: define double @fcmp_0_fsub_select2(
736; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
737; CHECK-NEXT:  [[ENTRY:.*]]:
738; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
739; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
740; CHECK:       [[FOR_HEADER]]:
741; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
742; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
743; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
744; CHECK:       [[VECTOR_PH]]:
745; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
746; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
747; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
748; CHECK:       [[VECTOR_BODY]]:
749; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
750; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
751; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
752; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
753; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
754; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
755; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
756; CHECK-NEXT:    [[TMP4:%.*]] = fsub fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]]
757; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
758; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
759; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
760; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
761; CHECK:       [[MIDDLE_BLOCK]]:
762; CHECK-NEXT:    [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
763; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
764; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
765; CHECK:       [[SCALAR_PH]]:
766; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
767; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
768; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
769; CHECK:       [[FOR_BODY]]:
770; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
771; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
772; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
773; CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
774; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
775; CHECK-NEXT:    [[SUB:%.*]] = fsub fast double [[SUM_1]], [[TMP8]]
776; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]]
777; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
778; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
779; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
780; CHECK:       [[FOR_END_LOOPEXIT]]:
781; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
782; CHECK-NEXT:    br label %[[FOR_END]]
783; CHECK:       [[FOR_END]]:
784; CHECK-NEXT:    [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
785; CHECK-NEXT:    ret double [[TMP9]]
786;
787entry:
788  %cmp.1 = icmp sgt i32 %N, 0
789  br i1 %cmp.1, label %for.header, label %for.end
790
791for.header:                                       ; preds = %entry
792  %zext = zext i32 %N to i64
793  br label %for.body
794
795for.body:                                         ; preds = %for.body, %for.header
796  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
797  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
798  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
799  %0 = load double, ptr %arrayidx, align 4
800  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
801  %sub = fsub fast double %sum.1, %0
802  %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
803  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
804  %exitcond = icmp eq i64 %indvars.iv.next, %zext
805  br i1 %exitcond, label %for.end, label %for.body
806
807for.end:                                          ; preds = %for.body, %entry
808  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
809  ret double %1
810}
811
812; Double pattern:
813; Check that is not vectorized if fp-instruction has no fast-math property.
814;
815; double fcmp_0_fsub_select2_notvectorize(ptr restrict x, const int N) {
816;   double sum = 0.
817;   for (int i = 0; i < N; ++i)
818;     if (x[i] > 0.)
819;       sum -= x[i];
820;   return sum;
821; }
822
823define double @fcmp_0_fsub_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
824; CHECK-LABEL: define double @fcmp_0_fsub_select2_notvectorize(
825; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
826; CHECK-NEXT:  [[ENTRY:.*]]:
827; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
828; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
829; CHECK:       [[FOR_HEADER]]:
830; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
831; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
832; CHECK:       [[FOR_BODY]]:
833; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
834; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
835; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
836; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4
837; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00
838; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[SUM_1]], [[TMP0]]
839; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[SUB]], double [[SUM_1]]
840; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
841; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
842; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
843; CHECK:       [[FOR_END_LOOPEXIT]]:
844; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ]
845; CHECK-NEXT:    br label %[[FOR_END]]
846; CHECK:       [[FOR_END]]:
847; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
848; CHECK-NEXT:    ret double [[TMP1]]
849;
850entry:
851  %cmp.1 = icmp sgt i32 %N, 0
852  br i1 %cmp.1, label %for.header, label %for.end
853
854for.header:                                       ; preds = %entry
855  %zext = zext i32 %N to i64
856  br label %for.body
857
858for.body:                                         ; preds = %for.body, %for.header
859  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
860  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
861  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
862  %0 = load double, ptr %arrayidx, align 4
863  %cmp.2 = fcmp ogt double %0, 0.000000e+00
864  %sub = fsub double %sum.1, %0
865  %sum.2 = select i1 %cmp.2, double %sub, double %sum.1
866  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
867  %exitcond = icmp eq i64 %indvars.iv.next, %zext
868  br i1 %exitcond, label %for.end, label %for.body
869
870for.end:                                          ; preds = %for.body, %entry
871  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
872  ret double %1
873}
874
875; Float pattern:
876;   Check vectorization of reduction code which has an fmul instruction after
877;   an fcmp instruction which compares an array element and 0.
878;
879; float fcmp_0_fmult_select1(ptr restrict x, const int N) {
880;   float sum = 0.
881;   for (int i = 0; i < N; ++i)
882;     if (x[i] > (float)0.)
883;       sum *= x[i];
884;   return sum;
885; }
886
887define float @fcmp_0_fmult_select1(ptr noalias %x, i32 %N) nounwind readonly {
888; CHECK-LABEL: define float @fcmp_0_fmult_select1(
889; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
890; CHECK-NEXT:  [[ENTRY:.*]]:
891; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
892; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
893; CHECK:       [[FOR_HEADER]]:
894; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
895; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
896; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
897; CHECK:       [[VECTOR_PH]]:
898; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
899; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
900; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
901; CHECK:       [[VECTOR_BODY]]:
902; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
903; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
904; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
905; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
906; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
907; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
908; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
909; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
910; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
911; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
912; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
913; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
914; CHECK:       [[MIDDLE_BLOCK]]:
915; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]])
916; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
917; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
918; CHECK:       [[SCALAR_PH]]:
919; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
920; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
921; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
922; CHECK:       [[FOR_BODY]]:
923; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
924; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
925; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
926; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
927; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
928; CHECK-NEXT:    [[MULT:%.*]] = fmul fast float [[SUM_1]], [[TMP8]]
929; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]]
930; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
931; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
932; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
933; CHECK:       [[FOR_END_LOOPEXIT]]:
934; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
935; CHECK-NEXT:    br label %[[FOR_END]]
936; CHECK:       [[FOR_END]]:
937; CHECK-NEXT:    [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
938; CHECK-NEXT:    ret float [[TMP9]]
939;
940entry:
941  %cmp.1 = icmp sgt i32 %N, 0
942  br i1 %cmp.1, label %for.header, label %for.end
943
944for.header:                                       ; preds = %entry
945  %zext = zext i32 %N to i64
946  br label %for.body
947
948for.body:                                         ; preds = %for.body, %for.header
949  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
950  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
951  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
952  %0 = load float, ptr %arrayidx, align 4
953  %cmp.2 = fcmp fast ogt float %0, 0.000000e+00
954  %mult = fmul fast float %sum.1, %0
955  %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
956  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
957  %exitcond = icmp eq i64 %indvars.iv.next, %zext
958  br i1 %exitcond, label %for.end, label %for.body
959
960for.end:                                          ; preds = %for.body, %entry
961  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
962  ret float %1
963}
964
965; Float pattern:
966;   Check that is not vectorized if fp-instruction has no fast-math property.
967;
968; float fcmp_0_fmult_select1_notvectorize(ptr restrict x, const int N) {
969;   float sum = 0.
970;   for (int i = 0; i < N; ++i)
971;     if (x[i] > (float)0.)
972;       sum *= x[i];
973;   return sum;
974; }
975
976define float @fcmp_0_fmult_select1_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
977; CHECK-LABEL: define float @fcmp_0_fmult_select1_notvectorize(
978; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
979; CHECK-NEXT:  [[ENTRY:.*]]:
980; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
981; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
982; CHECK:       [[FOR_HEADER]]:
983; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
984; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
985; CHECK:       [[FOR_BODY]]:
986; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
987; CHECK-NEXT:    [[SUM_1:%.*]] = phi float [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
988; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
989; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
990; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt float [[TMP0]], 0.000000e+00
991; CHECK-NEXT:    [[MULT:%.*]] = fmul float [[SUM_1]], [[TMP0]]
992; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], float [[MULT]], float [[SUM_1]]
993; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
994; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
995; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
996; CHECK:       [[FOR_END_LOOPEXIT]]:
997; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ]
998; CHECK-NEXT:    br label %[[FOR_END]]
999; CHECK:       [[FOR_END]]:
1000; CHECK-NEXT:    [[TMP1:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1001; CHECK-NEXT:    ret float [[TMP1]]
1002;
1003entry:
1004  %cmp.1 = icmp sgt i32 %N, 0
1005  br i1 %cmp.1, label %for.header, label %for.end
1006
1007for.header:                                       ; preds = %entry
1008  %zext = zext i32 %N to i64
1009  br label %for.body
1010
1011for.body:                                         ; preds = %for.body, %for.header
1012  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1013  %sum.1 = phi float [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
1014  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
1015  %0 = load float, ptr %arrayidx, align 4
1016  %cmp.2 = fcmp ogt float %0, 0.000000e+00
1017  %mult = fmul float %sum.1, %0
1018  %sum.2 = select i1 %cmp.2, float %mult, float %sum.1
1019  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1020  %exitcond = icmp eq i64 %indvars.iv.next, %zext
1021  br i1 %exitcond, label %for.end, label %for.body
1022
1023for.end:                                          ; preds = %for.body, %entry
1024  %1 = phi float [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
1025  ret float %1
1026}
1027
1028; Double pattern:
1029;   Check vectorization of reduction code which has an fmul instruction after
1030;   an fcmp instruction which compares an array element and 0.
1031;
1032; double fcmp_0_fmult_select2(ptr restrict x, const int N) {
1033;   double sum = 0.
1034;   for (int i = 0; i < N; ++i)
1035;     if (x[i] > 0.)
1036;       sum *= x[i];
1037;   return sum;
1038; }
1039
1040define double @fcmp_0_fmult_select2(ptr noalias %x, i32 %N) nounwind readonly {
1041; CHECK-LABEL: define double @fcmp_0_fmult_select2(
1042; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1043; CHECK-NEXT:  [[ENTRY:.*]]:
1044; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
1045; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
1046; CHECK:       [[FOR_HEADER]]:
1047; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
1048; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
1049; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1050; CHECK:       [[VECTOR_PH]]:
1051; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
1052; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
1053; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1054; CHECK:       [[VECTOR_BODY]]:
1055; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1056; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
1057; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1058; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
1059; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
1060; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
1061; CHECK-NEXT:    [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
1062; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]]
1063; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
1064; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1065; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1066; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1067; CHECK:       [[MIDDLE_BLOCK]]:
1068; CHECK-NEXT:    [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> [[TMP5]])
1069; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
1070; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1071; CHECK:       [[SCALAR_PH]]:
1072; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1073; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
1074; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1075; CHECK:       [[FOR_BODY]]:
1076; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1077; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
1078; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
1079; CHECK-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
1080; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
1081; CHECK-NEXT:    [[MULT:%.*]] = fmul fast double [[SUM_1]], [[TMP8]]
1082; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]]
1083; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1084; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
1085; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1086; CHECK:       [[FOR_END_LOOPEXIT]]:
1087; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
1088; CHECK-NEXT:    br label %[[FOR_END]]
1089; CHECK:       [[FOR_END]]:
1090; CHECK-NEXT:    [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1091; CHECK-NEXT:    ret double [[TMP9]]
1092;
1093entry:
1094  %cmp.1 = icmp sgt i32 %N, 0
1095  br i1 %cmp.1, label %for.header, label %for.end
1096
1097for.header:                                       ; preds = %entry
1098  %zext = zext i32 %N to i64
1099  br label %for.body
1100
1101for.body:                                         ; preds = %for.body, %for.header
1102  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1103  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
1104  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
1105  %0 = load double, ptr %arrayidx, align 4
1106  %cmp.2 = fcmp fast ogt double %0, 0.000000e+00
1107  %mult = fmul fast double %sum.1, %0
1108  %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
1109  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1110  %exitcond = icmp eq i64 %indvars.iv.next, %zext
1111  br i1 %exitcond, label %for.end, label %for.body
1112
1113for.end:                                          ; preds = %for.body, %entry
1114  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
1115  ret double %1
1116}
1117
1118; Double pattern:
1119;   Check that is not vectorized if fp-instruction has no fast-math property.
1120;
1121; double fcmp_0_fmult_select2_notvectorize(ptr restrict x, const int N) {
1122;   double sum = 0.
1123;   for (int i = 0; i < N; ++i)
1124;     if (x[i] > 0.)
1125;       sum *= x[i];
1126;   return sum;
1127; }
1128
1129define double @fcmp_0_fmult_select2_notvectorize(ptr noalias %x, i32 %N) nounwind readonly {
1130; CHECK-LABEL: define double @fcmp_0_fmult_select2_notvectorize(
1131; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1132; CHECK-NEXT:  [[ENTRY:.*]]:
1133; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
1134; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
1135; CHECK:       [[FOR_HEADER]]:
1136; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
1137; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1138; CHECK:       [[FOR_BODY]]:
1139; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_HEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1140; CHECK-NEXT:    [[SUM_1:%.*]] = phi double [ 0.000000e+00, %[[FOR_HEADER]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
1141; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
1142; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 4
1143; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt double [[TMP0]], 0.000000e+00
1144; CHECK-NEXT:    [[MULT:%.*]] = fmul double [[SUM_1]], [[TMP0]]
1145; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], double [[MULT]], double [[SUM_1]]
1146; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1147; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
1148; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
1149; CHECK:       [[FOR_END_LOOPEXIT]]:
1150; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ]
1151; CHECK-NEXT:    br label %[[FOR_END]]
1152; CHECK:       [[FOR_END]]:
1153; CHECK-NEXT:    [[TMP1:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1154; CHECK-NEXT:    ret double [[TMP1]]
1155;
1156entry:
1157  %cmp.1 = icmp sgt i32 %N, 0
1158  br i1 %cmp.1, label %for.header, label %for.end
1159
1160for.header:                                       ; preds = %entry
1161  %zext = zext i32 %N to i64
1162  br label %for.body
1163
1164for.body:                                         ; preds = %for.body, %for.header
1165  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1166  %sum.1 = phi double [ 0.000000e+00, %for.header ], [ %sum.2, %for.body ]
1167  %arrayidx = getelementptr inbounds double, ptr %x, i64 %indvars.iv
1168  %0 = load double, ptr %arrayidx, align 4
1169  %cmp.2 = fcmp ogt double %0, 0.000000e+00
1170  %mult = fmul double %sum.1, %0
1171  %sum.2 = select i1 %cmp.2, double %mult, double %sum.1
1172  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1173  %exitcond = icmp eq i64 %indvars.iv.next, %zext
1174  br i1 %exitcond, label %for.end, label %for.body
1175
1176for.end:                                          ; preds = %for.body, %entry
1177  %1 = phi double [ 0.000000e+00, %entry ], [ %sum.2, %for.body ]
1178  ret double %1
1179}
1180
1181; Float multi pattern
1182;   Check vectorisation of reduction code with a pair of selects to different
1183;   fadd patterns.
1184;
1185; float fcmp_multi(ptr a, int n) {
1186;   float sum=0.0;
1187;   for (int i=0;i<n;i++) {
1188;     if (a[i]>1.0)
1189;       sum+=a[i];
1190;     else if (a[i]<3.0)
1191;       sum+=2*a[i];
1192;     else
1193;       sum+=3*a[i];
1194;   }
1195;   return sum;
1196; }
1197
1198define float @fcmp_multi(ptr nocapture readonly %a, i32 %n) nounwind readonly {
1199; CHECK-LABEL: define float @fcmp_multi(
1200; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1201; CHECK-NEXT:  [[ENTRY:.*]]:
1202; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N]], 0
1203; CHECK-NEXT:    br i1 [[CMP10]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1204; CHECK:       [[FOR_BODY_PREHEADER]]:
1205; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1206; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
1207; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1208; CHECK:       [[VECTOR_PH]]:
1209; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
1210; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
1211; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1212; CHECK:       [[VECTOR_BODY]]:
1213; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1214; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
1215; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1216; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
1217; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
1218; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1219; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
1220; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
1221; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1222; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
1223; CHECK-NEXT:    [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer
1224; CHECK-NEXT:    [[TMP8:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1225; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x float> [[WIDE_LOAD]], splat (float 2.000000e+00)
1226; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP7]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
1227; CHECK-NEXT:    [[PREDPHI1:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[WIDE_LOAD]], <4 x float> [[PREDPHI]]
1228; CHECK-NEXT:    [[TMP10]] = fadd fast <4 x float> [[PREDPHI1]], [[VEC_PHI]]
1229; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1230; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1231; CHECK-NEXT:    br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1232; CHECK:       [[MIDDLE_BLOCK]]:
1233; CHECK-NEXT:    [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP10]])
1234; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
1235; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1236; CHECK:       [[SCALAR_PH]]:
1237; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
1238; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ]
1239; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1240; CHECK:       [[FOR_BODY]]:
1241; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
1242; CHECK-NEXT:    [[SUM_011:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
1243; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
1244; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1245; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[TMP13]], 1.000000e+00
1246; CHECK-NEXT:    br i1 [[CMP1]], label %[[FOR_INC]], label %[[IF_ELSE:.*]]
1247; CHECK:       [[IF_ELSE]]:
1248; CHECK-NEXT:    [[CMP8:%.*]] = fcmp olt float [[TMP13]], 3.000000e+00
1249; CHECK-NEXT:    br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[IF_ELSE14:.*]]
1250; CHECK:       [[IF_THEN10]]:
1251; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP13]], 2.000000e+00
1252; CHECK-NEXT:    br label %[[FOR_INC]]
1253; CHECK:       [[IF_ELSE14]]:
1254; CHECK-NEXT:    [[MUL17:%.*]] = fmul fast float [[TMP13]], 3.000000e+00
1255; CHECK-NEXT:    br label %[[FOR_INC]]
1256; CHECK:       [[FOR_INC]]:
1257; CHECK-NEXT:    [[DOTPN:%.*]] = phi float [ [[MUL]], %[[IF_THEN10]] ], [ [[MUL17]], %[[IF_ELSE14]] ], [ [[TMP13]], %[[FOR_BODY]] ]
1258; CHECK-NEXT:    [[SUM_1]] = fadd fast float [[DOTPN]], [[SUM_011]]
1259; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1260; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1261; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1262; CHECK:       [[FOR_END_LOOPEXIT]]:
1263; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ]
1264; CHECK-NEXT:    br label %[[FOR_END]]
1265; CHECK:       [[FOR_END]]:
1266; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1267; CHECK-NEXT:    ret float [[SUM_0_LCSSA]]
1268;
1269entry:
1270  %cmp10 = icmp sgt i32 %n, 0
1271  br i1 %cmp10, label %for.body.preheader, label %for.end
1272
1273for.body.preheader:                               ; preds = %entry
1274  %wide.trip.count = zext i32 %n to i64
1275  br label %for.body
1276
1277for.body:                                         ; preds = %for.inc, %for.body.preheader
1278  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
1279  %sum.011 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
1280  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1281  %0 = load float, ptr %arrayidx, align 4
1282  %cmp1 = fcmp ogt float %0, 1.000000e+00
1283  br i1 %cmp1, label %for.inc, label %if.else
1284
1285if.else:                                          ; preds = %for.body
1286  %cmp8 = fcmp olt float %0, 3.000000e+00
1287  br i1 %cmp8, label %if.then10, label %if.else14
1288
1289if.then10:                                        ; preds = %if.else
1290  %mul = fmul fast float %0, 2.000000e+00
1291  br label %for.inc
1292
1293if.else14:                                        ; preds = %if.else
1294  %mul17 = fmul fast float %0, 3.000000e+00
1295  br label %for.inc
1296
1297for.inc:                                          ; preds = %for.body, %if.else14, %if.then10
1298  %.pn = phi float [ %mul, %if.then10 ], [ %mul17, %if.else14 ], [ %0, %for.body ]
1299  %sum.1 = fadd fast float %.pn, %sum.011
1300  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1301  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
1302  br i1 %exitcond, label %for.end, label %for.body
1303
1304for.end:                                          ; preds = %for.inc, %entry
1305  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
1306  ret float %sum.0.lcssa
1307}
1308
1309; Float fadd + fsub patterns
1310;   Check vectorisation of reduction code with a pair of selects to different
1311;   instructions { fadd, fsub } but equivalent (change in constant).
1312;
1313; float fcmp_multi(ptr a, int n) {
1314;   float sum=0.0;
1315;   for (int i=0;i<n;i++) {
1316;     if (a[i]>1.0)
1317;       sum+=a[i];
1318;     else if (a[i]<3.0)
1319;       sum-=a[i];
1320;   }
1321;   return sum;
1322; }
1323
1324define float @fcmp_fadd_fsub(ptr nocapture readonly %a, i32 %n) nounwind readonly {
1325; CHECK-LABEL: define float @fcmp_fadd_fsub(
1326; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1327; CHECK-NEXT:  [[ENTRY:.*]]:
1328; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1329; CHECK-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1330; CHECK:       [[FOR_BODY_PREHEADER]]:
1331; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1332; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], 4
1333; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1334; CHECK:       [[VECTOR_PH]]:
1335; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], 4
1336; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
1337; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1338; CHECK:       [[VECTOR_BODY]]:
1339; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1340; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI1:%.*]], %[[VECTOR_BODY]] ]
1341; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1342; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP0]]
1343; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
1344; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1345; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00)
1346; CHECK-NEXT:    [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true)
1347; CHECK-NEXT:    [[TMP5:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1348; CHECK-NEXT:    [[TMP6:%.*]] = fsub fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
1349; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
1350; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
1351; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer
1352; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP7]], <4 x float> [[TMP6]]
1353; CHECK-NEXT:    [[PREDPHI1]] = select <4 x i1> [[TMP9]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
1354; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1355; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1356; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1357; CHECK:       [[MIDDLE_BLOCK]]:
1358; CHECK-NEXT:    [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI1]])
1359; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
1360; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1361; CHECK:       [[SCALAR_PH]]:
1362; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
1363; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP11]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ]
1364; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1365; CHECK:       [[FOR_BODY]]:
1366; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
1367; CHECK-NEXT:    [[SUM_010:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
1368; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
1369; CHECK-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1370; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[TMP12]], 1.000000e+00
1371; CHECK-NEXT:    br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
1372; CHECK:       [[IF_THEN]]:
1373; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP12]], [[SUM_010]]
1374; CHECK-NEXT:    br label %[[FOR_INC]]
1375; CHECK:       [[IF_ELSE]]:
1376; CHECK-NEXT:    [[CMP8:%.*]] = fcmp olt float [[TMP12]], 3.000000e+00
1377; CHECK-NEXT:    br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]]
1378; CHECK:       [[IF_THEN10]]:
1379; CHECK-NEXT:    [[SUB:%.*]] = fsub fast float [[SUM_010]], [[TMP12]]
1380; CHECK-NEXT:    br label %[[FOR_INC]]
1381; CHECK:       [[FOR_INC]]:
1382; CHECK-NEXT:    [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[SUB]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ]
1383; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1384; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1385; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
1386; CHECK:       [[FOR_END_LOOPEXIT]]:
1387; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ]
1388; CHECK-NEXT:    br label %[[FOR_END]]
1389; CHECK:       [[FOR_END]]:
1390; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1391; CHECK-NEXT:    ret float [[SUM_0_LCSSA]]
1392;
1393entry:
1394  %cmp9 = icmp sgt i32 %n, 0
1395  br i1 %cmp9, label %for.body.preheader, label %for.end
1396
1397for.body.preheader:                               ; preds = %entry
1398  %wide.trip.count = zext i32 %n to i64
1399  br label %for.body
1400
1401for.body:                                         ; preds = %for.inc, %for.body.preheader
1402  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
1403  %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
1404  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1405  %0 = load float, ptr %arrayidx, align 4
1406  %cmp1 = fcmp ogt float %0, 1.000000e+00
1407  br i1 %cmp1, label %if.then, label %if.else
1408
1409if.then:                                          ; preds = %for.body
1410  %add = fadd fast float %0, %sum.010
1411  br label %for.inc
1412
1413if.else:                                          ; preds = %for.body
1414  %cmp8 = fcmp olt float %0, 3.000000e+00
1415  br i1 %cmp8, label %if.then10, label %for.inc
1416
1417if.then10:                                        ; preds = %if.else
1418  %sub = fsub fast float %sum.010, %0
1419  br label %for.inc
1420
1421for.inc:                                          ; preds = %if.then, %if.then10, %if.else
1422  %sum.1 = phi float [ %add, %if.then ], [ %sub, %if.then10 ], [ %sum.010, %if.else ]
1423  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1424  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
1425  br i1 %exitcond, label %for.end, label %for.body
1426
1427for.end:                                          ; preds = %for.inc, %entry
1428  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
1429  ret float %sum.0.lcssa
1430}
1431
1432; Float fadd + fmul patterns
1433;   Check lack of vectorisation of reduction code with a pair of non-compatible
1434;   instructions { fadd, fmul }.
1435;
1436; float fcmp_multi(ptr a, int n) {
1437;   float sum=0.0;
1438;   for (int i=0;i<n;i++) {
1439;     if (a[i]>1.0)
1440;       sum+=a[i];
1441;     else if (a[i]<3.0)
1442;       sum*=a[i];
1443;   }
1444;   return sum;
1445; }
1446
1447define float @fcmp_fadd_fmul(ptr nocapture readonly %a, i32 %n) nounwind readonly {
1448; CHECK-LABEL: define float @fcmp_fadd_fmul(
1449; CHECK-SAME: ptr readonly captures(none) [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1450; CHECK-NEXT:  [[ENTRY:.*]]:
1451; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N]], 0
1452; CHECK-NEXT:    br i1 [[CMP9]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1453; CHECK:       [[FOR_BODY_PREHEADER]]:
1454; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
1455; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1456; CHECK:       [[FOR_BODY]]:
1457; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ]
1458; CHECK-NEXT:    [[SUM_010:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[SUM_1:%.*]], %[[FOR_INC]] ]
1459; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
1460; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1461; CHECK-NEXT:    [[CMP1:%.*]] = fcmp ogt float [[TMP0]], 1.000000e+00
1462; CHECK-NEXT:    br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_ELSE:.*]]
1463; CHECK:       [[IF_THEN]]:
1464; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP0]], [[SUM_010]]
1465; CHECK-NEXT:    br label %[[FOR_INC]]
1466; CHECK:       [[IF_ELSE]]:
1467; CHECK-NEXT:    [[CMP8:%.*]] = fcmp olt float [[TMP0]], 3.000000e+00
1468; CHECK-NEXT:    br i1 [[CMP8]], label %[[IF_THEN10:.*]], label %[[FOR_INC]]
1469; CHECK:       [[IF_THEN10]]:
1470; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP0]], [[SUM_010]]
1471; CHECK-NEXT:    br label %[[FOR_INC]]
1472; CHECK:       [[FOR_INC]]:
1473; CHECK-NEXT:    [[SUM_1]] = phi float [ [[ADD]], %[[IF_THEN]] ], [ [[MUL]], %[[IF_THEN10]] ], [ [[SUM_010]], %[[IF_ELSE]] ]
1474; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1475; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1476; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
1477; CHECK:       [[FOR_END_LOOPEXIT]]:
1478; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ [[SUM_1]], %[[FOR_INC]] ]
1479; CHECK-NEXT:    br label %[[FOR_END]]
1480; CHECK:       [[FOR_END]]:
1481; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_1_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1482; CHECK-NEXT:    ret float [[SUM_0_LCSSA]]
1483;
1484entry:
1485  %cmp9 = icmp sgt i32 %n, 0
1486  br i1 %cmp9, label %for.body.preheader, label %for.end
1487
1488for.body.preheader:                               ; preds = %entry
1489  %wide.trip.count = zext i32 %n to i64
1490  br label %for.body
1491
1492for.body:                                         ; preds = %for.inc, %for.body.preheader
1493  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.inc ]
1494  %sum.010 = phi float [ 0.000000e+00, %for.body.preheader ], [ %sum.1, %for.inc ]
1495  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1496  %0 = load float, ptr %arrayidx, align 4
1497  %cmp1 = fcmp ogt float %0, 1.000000e+00
1498  br i1 %cmp1, label %if.then, label %if.else
1499
1500if.then:                                          ; preds = %for.body
1501  %add = fadd fast float %0, %sum.010
1502  br label %for.inc
1503
1504if.else:                                          ; preds = %for.body
1505  %cmp8 = fcmp olt float %0, 3.000000e+00
1506  br i1 %cmp8, label %if.then10, label %for.inc
1507
1508if.then10:                                        ; preds = %if.else
1509  %mul = fmul fast float %0, %sum.010
1510  br label %for.inc
1511
1512for.inc:                                          ; preds = %if.then, %if.then10, %if.else
1513  %sum.1 = phi float [ %add, %if.then ], [ %mul, %if.then10 ], [ %sum.010, %if.else ]
1514  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1515  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
1516  br i1 %exitcond, label %for.end, label %for.body
1517
1518for.end:                                          ; preds = %for.inc, %entry
1519  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %sum.1, %for.inc ]
1520  ret float %sum.0.lcssa
1521}
1522
1523; Float fadd + store patterns
1524;   Check lack of vectorisation of reduction code with a store back, given it
1525;   has loop dependency on a[i].
1526;
1527; float fcmp_store_back(float a[], int LEN) {
1528;     float sum = 0.0;
1529;     for (int i = 0; i < LEN; i++) {
1530;       sum += a[i];
1531;       a[i] = sum;
1532;     }
1533;     return sum;
1534; }
1535
1536define float @fcmp_store_back(ptr nocapture %a, i32 %LEN) nounwind readonly {
1537; CHECK-LABEL: define float @fcmp_store_back(
1538; CHECK-SAME: ptr captures(none) [[A:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
1539; CHECK-NEXT:  [[ENTRY:.*]]:
1540; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[LEN]], 0
1541; CHECK-NEXT:    br i1 [[CMP7]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]]
1542; CHECK:       [[FOR_BODY_PREHEADER]]:
1543; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN]] to i64
1544; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1545; CHECK:       [[FOR_BODY]]:
1546; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1547; CHECK-NEXT:    [[SUM_08:%.*]] = phi float [ 0.000000e+00, %[[FOR_BODY_PREHEADER]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ]
1548; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
1549; CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1550; CHECK-NEXT:    [[ADD]] = fadd fast float [[TMP0]], [[SUM_08]]
1551; CHECK-NEXT:    store float [[ADD]], ptr [[ARRAYIDX]], align 4
1552; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1553; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
1554; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_BODY]]
1555; CHECK:       [[FOR_END_LOOPEXIT]]:
1556; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ]
1557; CHECK-NEXT:    br label %[[FOR_END]]
1558; CHECK:       [[FOR_END]]:
1559; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[ADD_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1560; CHECK-NEXT:    ret float [[SUM_0_LCSSA]]
1561;
1562entry:
1563  %cmp7 = icmp sgt i32 %LEN, 0
1564  br i1 %cmp7, label %for.body.preheader, label %for.end
1565
1566for.body.preheader:                               ; preds = %entry
1567  %wide.trip.count = zext i32 %LEN to i64
1568  br label %for.body
1569
1570for.body:                                         ; preds = %for.body, %for.body.preheader
1571  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
1572  %sum.08 = phi float [ 0.000000e+00, %for.body.preheader ], [ %add, %for.body ]
1573  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1574  %0 = load float, ptr %arrayidx, align 4
1575  %add = fadd fast float %0, %sum.08
1576  store float %add, ptr %arrayidx, align 4
1577  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1578  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
1579  br i1 %exitcond, label %for.end, label %for.body
1580
1581for.end:                                          ; preds = %for.body, %entry
1582  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
1583  ret float %sum.0.lcssa
1584}
1585
1586define i64 @fcmp_0_add_select2(ptr noalias %x, i64 %N) nounwind readonly {
1587; CHECK-LABEL: define i64 @fcmp_0_add_select2(
1588; CHECK-SAME: ptr noalias [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
1589; CHECK-NEXT:  [[ENTRY:.*]]:
1590; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i64 [[N]], 0
1591; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
1592; CHECK:       [[FOR_HEADER]]:
1593; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
1594; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1595; CHECK:       [[VECTOR_PH]]:
1596; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
1597; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1598; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1599; CHECK:       [[VECTOR_BODY]]:
1600; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1601; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
1602; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1603; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
1604; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
1605; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1606; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
1607; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i64> [[VEC_PHI]], splat (i64 2)
1608; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP4]], <4 x i64> [[VEC_PHI]]
1609; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1610; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1611; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1612; CHECK:       [[MIDDLE_BLOCK]]:
1613; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP5]])
1614; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1615; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1616; CHECK:       [[SCALAR_PH]]:
1617; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1618; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1619; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1620; CHECK:       [[FOR_BODY]]:
1621; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1622; CHECK-NEXT:    [[SUM_1:%.*]] = phi i64 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
1623; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
1624; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1625; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00
1626; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[SUM_1]], 2
1627; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], i64 [[ADD]], i64 [[SUM_1]]
1628; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1629; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
1630; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1631; CHECK:       [[FOR_END_LOOPEXIT]]:
1632; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi i64 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
1633; CHECK-NEXT:    br label %[[FOR_END]]
1634; CHECK:       [[FOR_END]]:
1635; CHECK-NEXT:    [[TMP9:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1636; CHECK-NEXT:    ret i64 [[TMP9]]
1637;
1638entry:
1639  %cmp.1 = icmp sgt i64 %N, 0
1640  br i1 %cmp.1, label %for.header, label %for.end
1641
1642for.header:                                       ; preds = %entry
1643  br label %for.body
1644
1645for.body:                                         ; preds = %header, %for.body
1646  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1647  %sum.1 = phi i64 [ 0, %for.header ], [ %sum.2, %for.body ]
1648  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
1649  %0 = load float, ptr %arrayidx, align 4
1650  %cmp.2 = fcmp ogt float %0, 0.000000e+00
1651  %add = add nsw i64 %sum.1, 2
1652  %sum.2 = select i1 %cmp.2, i64 %add, i64 %sum.1
1653  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1654  %exitcond = icmp eq i64 %indvars.iv.next, %N
1655  br i1 %exitcond, label %for.end, label %for.body
1656
1657for.end:                                          ; preds = %for.body, %entry
1658  %1 = phi i64 [ 0, %entry ], [ %sum.2, %for.body ]
1659  ret i64 %1
1660}
1661
1662; FIXME: %indvars.iv.next is poison on first iteration due to sub nuw 0, 1.
1663define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly {
1664; CHECK-LABEL: define i32 @fcmp_0_sub_select1(
1665; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1666; CHECK-NEXT:  [[ENTRY:.*]]:
1667; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
1668; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
1669; CHECK:       [[FOR_HEADER]]:
1670; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
1671; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 0, [[ZEXT]]
1672; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1673; CHECK:       [[VECTOR_PH]]:
1674; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4
1675; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
1676; CHECK-NEXT:    [[IND_END:%.*]] = sub i64 0, [[N_VEC]]
1677; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1678; CHECK:       [[VECTOR_BODY]]:
1679; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1680; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
1681; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]]
1682; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
1683; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP1]]
1684; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1685; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3
1686; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
1687; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1688; CHECK-NEXT:    [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer
1689; CHECK-NEXT:    [[TMP6:%.*]] = sub <4 x i32> [[VEC_PHI]], splat (i32 2)
1690; CHECK-NEXT:    [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[VEC_PHI]]
1691; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1692; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1693; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1694; CHECK:       [[MIDDLE_BLOCK]]:
1695; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP7]])
1696; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
1697; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1698; CHECK:       [[SCALAR_PH]]:
1699; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1700; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1701; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1702; CHECK:       [[FOR_BODY]]:
1703; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1704; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
1705; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
1706; CHECK-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1707; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt float [[TMP10]], 0.000000e+00
1708; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[SUM_1]], 2
1709; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], i32 [[SUB]], i32 [[SUM_1]]
1710; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = sub nuw nsw i64 [[INDVARS_IV]], 1
1711; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
1712; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
1713; CHECK:       [[FOR_END_LOOPEXIT]]:
1714; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ]
1715; CHECK-NEXT:    br label %[[FOR_END]]
1716; CHECK:       [[FOR_END]]:
1717; CHECK-NEXT:    [[TMP11:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1718; CHECK-NEXT:    ret i32 [[TMP11]]
1719;
1720entry:
1721  %cmp.1 = icmp sgt i32 %N, 0
1722  br i1 %cmp.1, label %for.header, label %for.end
1723
1724for.header:                                       ; preds = %entry
1725  %zext = zext i32 %N to i64
1726  br label %for.body
1727
1728for.body:                                         ; preds = %header, %for.body
1729  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1730  %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
1731  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
1732  %0 = load float, ptr %arrayidx, align 4
1733  %cmp.2 = fcmp ogt float %0, 0.000000e+00
1734  %sub = sub nsw i32 %sum.1, 2
1735  %sum.2 = select i1 %cmp.2, i32 %sub, i32 %sum.1
1736  %indvars.iv.next = sub nuw nsw i64 %indvars.iv, 1
1737  %exitcond = icmp eq i64 %indvars.iv.next, %zext
1738  br i1 %exitcond, label %for.end, label %for.body
1739
1740for.end:                                          ; preds = %for.body, %entry
1741  %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
1742  ret i32 %1
1743}
1744
1745define i32 @fcmp_0_mult_select1(ptr noalias %x, i32 %N) nounwind readonly {
1746; CHECK-LABEL: define i32 @fcmp_0_mult_select1(
1747; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
1748; CHECK-NEXT:  [[ENTRY:.*]]:
1749; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
1750; CHECK-NEXT:    br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
1751; CHECK:       [[FOR_HEADER]]:
1752; CHECK-NEXT:    [[ZEXT:%.*]] = zext i32 [[N]] to i64
1753; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
1754; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1755; CHECK:       [[VECTOR_PH]]:
1756; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
1757; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
1758; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
1759; CHECK:       [[VECTOR_BODY]]:
1760; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1761; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
1762; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1763; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
1764; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
1765; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
1766; CHECK-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
1767; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], splat (i32 2)
1768; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]]
1769; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1770; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1771; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1772; CHECK:       [[MIDDLE_BLOCK]]:
1773; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]])
1774; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
1775; CHECK-NEXT:    br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1776; CHECK:       [[SCALAR_PH]]:
1777; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1778; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
1779; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1780; CHECK:       [[FOR_BODY]]:
1781; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
1782; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
1783; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
1784; CHECK-NEXT:    [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1785; CHECK-NEXT:    [[CMP_2:%.*]] = fcmp ogt float [[TMP8]], 0.000000e+00
1786; CHECK-NEXT:    [[MULT:%.*]] = mul nsw i32 [[SUM_1]], 2
1787; CHECK-NEXT:    [[SUM_2]] = select i1 [[CMP_2]], i32 [[MULT]], i32 [[SUM_1]]
1788; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1789; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
1790; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
1791; CHECK:       [[FOR_END_LOOPEXIT]]:
1792; CHECK-NEXT:    [[SUM_2_LCSSA:%.*]] = phi i32 [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
1793; CHECK-NEXT:    br label %[[FOR_END]]
1794; CHECK:       [[FOR_END]]:
1795; CHECK-NEXT:    [[TMP9:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
1796; CHECK-NEXT:    ret i32 [[TMP9]]
1797;
1798entry:
1799  %cmp.1 = icmp sgt i32 %N, 0
1800  br i1 %cmp.1, label %for.header, label %for.end
1801
1802for.header:                                       ; preds = %entry
1803  %zext = zext i32 %N to i64
1804  br label %for.body
1805
1806for.body:                                         ; preds = %for.body, %for.header
1807  %indvars.iv = phi i64 [ 0, %for.header ], [ %indvars.iv.next, %for.body ]
1808  %sum.1 = phi i32 [ 0, %for.header ], [ %sum.2, %for.body ]
1809  %arrayidx = getelementptr inbounds float, ptr %x, i64 %indvars.iv
1810  %0 = load float, ptr %arrayidx, align 4
1811  %cmp.2 = fcmp ogt float %0, 0.000000e+00
1812  %mult = mul nsw i32 %sum.1, 2
1813  %sum.2 = select i1 %cmp.2, i32 %mult, i32 %sum.1
1814  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1815  %exitcond = icmp eq i64 %indvars.iv.next, %zext
1816  br i1 %exitcond, label %for.end, label %for.body
1817
1818for.end:                                          ; preds = %for.body, %entry
1819  %1 = phi i32 [ 0, %entry ], [ %sum.2, %for.body ]
1820  ret i32 %1
1821}
1822
1823@table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1
1824
1825define i16 @non_reduction_index(i16 noundef %val) {
1826; CHECK-LABEL: define i16 @non_reduction_index(
1827; CHECK-SAME: i16 noundef [[VAL:%.*]]) {
1828; CHECK-NEXT:  [[ENTRY:.*]]:
1829; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1830; CHECK:       [[FOR_COND_CLEANUP:.*]]:
1831; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1832; CHECK-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
1833; CHECK:       [[FOR_BODY]]:
1834; CHECK-NEXT:    [[I_05:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[SUB:%.*]], %[[FOR_BODY]] ]
1835; CHECK-NEXT:    [[K_04:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1836; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[I_05]]
1837; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX]], align 1
1838; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[TMP0]], [[VAL]]
1839; CHECK-NEXT:    [[SUB]] = add nsw i16 [[I_05]], -1
1840; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[CMP1]], i16 [[SUB]], i16 [[K_04]]
1841; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i16 [[SUB]], 0
1842; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
1843;
1844entry:
1845  br label %for.body
1846
1847for.cond.cleanup:                                 ; preds = %for.body
1848  %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
1849  ret i16 %spec.select.lcssa
1850
1851for.body:                                         ; preds = %entry, %for.body
1852  %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
1853  %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
1854  %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
1855  %0 = load i16, ptr %arrayidx, align 1
1856  %cmp1 = icmp ugt i16 %0, %val
1857  %sub = add nsw i16 %i.05, -1
1858  %spec.select = select i1 %cmp1, i16 %sub, i16 %k.04
1859  %cmp.not = icmp eq i16 %sub, 0
1860  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1861}
1862
1863@tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1
1864
1865define i16 @non_reduction_index_half(half noundef %val) {
1866; CHECK-LABEL: define i16 @non_reduction_index_half(
1867; CHECK-SAME: half noundef [[VAL:%.*]]) {
1868; CHECK-NEXT:  [[ENTRY:.*]]:
1869; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1870; CHECK:       [[FOR_COND_CLEANUP:.*]]:
1871; CHECK-NEXT:    [[SPEC_SELECT_LCSSA:%.*]] = phi i16 [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ]
1872; CHECK-NEXT:    ret i16 [[SPEC_SELECT_LCSSA]]
1873; CHECK:       [[FOR_BODY]]:
1874; CHECK-NEXT:    [[I_05:%.*]] = phi i16 [ 12, %[[ENTRY]] ], [ [[SUB:%.*]], %[[FOR_BODY]] ]
1875; CHECK-NEXT:    [[K_04:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT]], %[[FOR_BODY]] ]
1876; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[I_05]]
1877; CHECK-NEXT:    [[TMP0:%.*]] = load half, ptr [[ARRAYIDX]], align 1
1878; CHECK-NEXT:    [[FCMP1:%.*]] = fcmp ugt half [[TMP0]], [[VAL]]
1879; CHECK-NEXT:    [[SUB]] = add nsw i16 [[I_05]], -1
1880; CHECK-NEXT:    [[SPEC_SELECT]] = select i1 [[FCMP1]], i16 [[SUB]], i16 [[K_04]]
1881; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i16 [[SUB]], 0
1882; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
1883;
1884entry:
1885  br label %for.body
1886
1887for.cond.cleanup:                                 ; preds = %for.body
1888  %spec.select.lcssa = phi i16 [ %spec.select, %for.body ]
1889  ret i16 %spec.select.lcssa
1890
1891for.body:                                         ; preds = %entry, %for.body
1892  %i.05 = phi i16 [ 12, %entry ], [ %sub, %for.body ]
1893  %k.04 = phi i16 [ 0, %entry ], [ %spec.select, %for.body ]
1894  %arrayidx = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 %i.05
1895  %0 = load half, ptr %arrayidx, align 1
1896  %fcmp1 = fcmp ugt half %0, %val
1897  %sub = add nsw i16 %i.05, -1
1898  %spec.select = select i1 %fcmp1, i16 %sub, i16 %k.04
1899  %cmp.not = icmp eq i16 %sub, 0
1900  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1901}
1902
1903;.
1904; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1905; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1906; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1907; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1908; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1909; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
1910; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1911; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1912; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1913; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1914; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1915; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1916; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1917; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1918; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1919; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1920; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1921; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
1922; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
1923; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
1924; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
1925; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]}
1926; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
1927; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]}
1928; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
1929; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]}
1930; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
1931; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]}
1932; CHECK: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
1933; CHECK: [[LOOP29]] = distinct !{[[LOOP29]], [[META2]], [[META1]]}
1934; CHECK: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
1935; CHECK: [[LOOP31]] = distinct !{[[LOOP31]], [[META2]], [[META1]]}
1936;.
1937