1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize \
3; RUN: -prefer-inloop-reductions \
4; RUN: -force-tail-folding-style=data-with-evl \
5; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
6; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=IF-EVL
7
8; RUN: opt -passes=loop-vectorize \
9; RUN: -prefer-inloop-reductions \
10; RUN: -force-tail-folding-style=none \
11; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
12; RUN: -mtriple=riscv64 -mattr=+v,+f -S < %s| FileCheck %s --check-prefix=NO-VP
13
14define i32 @add(ptr %a, i64 %n, i32 %start) {
15; IF-EVL-LABEL: @add(
16; IF-EVL-NEXT:  entry:
17; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
18; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
19; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
20; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
21; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
22; IF-EVL:       vector.ph:
23; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
24; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
25; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
26; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
27; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
28; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
29; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
30; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
31; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
32; IF-EVL:       vector.body:
33; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
34; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
35; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
36; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
37; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
38; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
39; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
40; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
41; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
42; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
43; IF-EVL-NEXT:    [[TMP15]] = add i32 [[TMP14]], [[VEC_PHI]]
44; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
45; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
46; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
47; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
48; IF-EVL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
49; IF-EVL:       middle.block:
50; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
51; IF-EVL:       scalar.ph:
52; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
53; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
54; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
55; IF-EVL:       for.body:
56; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
57; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
58; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
59; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
60; IF-EVL-NEXT:    [[ADD]] = add nsw i32 [[TMP18]], [[RDX]]
61; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
62; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
63; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
64; IF-EVL:       for.end:
65; IF-EVL-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
66; IF-EVL-NEXT:    ret i32 [[ADD_LCSSA]]
67;
68; NO-VP-LABEL: @add(
69; NO-VP-NEXT:  entry:
70; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
71; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
72; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
73; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
74; NO-VP:       vector.ph:
75; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
76; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
77; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
78; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
79; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
80; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
81; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
82; NO-VP:       vector.body:
83; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
84; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
85; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
86; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
87; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
88; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
89; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
90; NO-VP-NEXT:    [[TMP10]] = add i32 [[TMP9]], [[VEC_PHI]]
91; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
92; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
93; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
94; NO-VP:       middle.block:
95; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
96; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
97; NO-VP:       scalar.ph:
98; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
99; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
100; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
101; NO-VP:       for.body:
102; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
103; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
104; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
105; NO-VP-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
106; NO-VP-NEXT:    [[ADD]] = add nsw i32 [[TMP12]], [[RDX]]
107; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
108; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
109; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
110; NO-VP:       for.end:
111; NO-VP-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
112; NO-VP-NEXT:    ret i32 [[ADD_LCSSA]]
113;
114entry:
115  br label %for.body
116
117for.body:
118  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
119  %rdx = phi i32 [ %start, %entry ], [ %add, %for.body ]
120  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
121  %0 = load i32, ptr %arrayidx, align 4
122  %add = add nsw i32 %0, %rdx
123  %iv.next = add nuw nsw i64 %iv, 1
124  %exitcond.not = icmp eq i64 %iv.next, %n
125  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
126
127for.end:
128  ret i32 %add
129}
130
131; not support mul reduction for scalable vector
132define i32 @mul(ptr %a, i64 %n, i32 %start) {
133; IF-EVL-LABEL: @mul(
134; IF-EVL-NEXT:  entry:
135; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
136; IF-EVL:       for.body:
137; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
138; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
139; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
140; IF-EVL-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
141; IF-EVL-NEXT:    [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]]
142; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
143; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
144; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
145; IF-EVL:       for.end:
146; IF-EVL-NEXT:    [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ]
147; IF-EVL-NEXT:    ret i32 [[MUL_LCSSA]]
148;
149; NO-VP-LABEL: @mul(
150; NO-VP-NEXT:  entry:
151; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
152; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
153; NO-VP:       vector.ph:
154; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
155; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
156; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
157; NO-VP:       vector.body:
158; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
159; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
160; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
161; NO-VP-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
162; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
163; NO-VP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
164; NO-VP-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4
165; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
166; NO-VP-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
167; NO-VP-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]])
168; NO-VP-NEXT:    [[TMP7]] = mul i32 [[TMP6]], [[VEC_PHI]]
169; NO-VP-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD2]])
170; NO-VP-NEXT:    [[TMP9]] = mul i32 [[TMP8]], [[VEC_PHI1]]
171; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
172; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
173; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
174; NO-VP:       middle.block:
175; NO-VP-NEXT:    [[BIN_RDX:%.*]] = mul i32 [[TMP9]], [[TMP7]]
176; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
177; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
178; NO-VP:       scalar.ph:
179; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
180; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
181; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
182; NO-VP:       for.body:
183; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
184; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
185; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
186; NO-VP-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
187; NO-VP-NEXT:    [[MUL]] = mul nsw i32 [[TMP11]], [[RDX]]
188; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
189; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
190; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
191; NO-VP:       for.end:
192; NO-VP-NEXT:    [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
193; NO-VP-NEXT:    ret i32 [[MUL_LCSSA]]
194;
195entry:
196  br label %for.body
197
198for.body:
199  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
200  %rdx = phi i32 [ %start, %entry ], [ %mul, %for.body ]
201  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
202  %0 = load i32, ptr %arrayidx, align 4
203  %mul = mul nsw i32 %0, %rdx
204  %iv.next = add nuw nsw i64 %iv, 1
205  %exitcond.not = icmp eq i64 %iv.next, %n
206  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
207
208for.end:
209  ret i32 %mul
210}
211
212define i32 @or(ptr %a, i64 %n, i32 %start) {
213; IF-EVL-LABEL: @or(
214; IF-EVL-NEXT:  entry:
215; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
216; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
217; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
218; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
219; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
220; IF-EVL:       vector.ph:
221; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
222; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
223; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
224; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
225; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
226; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
227; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
228; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
229; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
230; IF-EVL:       vector.body:
231; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
232; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
233; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
234; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
235; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
236; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
237; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
238; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
239; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
240; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.or.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
241; IF-EVL-NEXT:    [[TMP15]] = or i32 [[TMP14]], [[VEC_PHI]]
242; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
243; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
244; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
245; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
246; IF-EVL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
247; IF-EVL:       middle.block:
248; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
249; IF-EVL:       scalar.ph:
250; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
251; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
252; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
253; IF-EVL:       for.body:
254; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
255; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ]
256; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
257; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
258; IF-EVL-NEXT:    [[OR]] = or i32 [[TMP18]], [[RDX]]
259; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
260; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
261; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
262; IF-EVL:       for.end:
263; IF-EVL-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
264; IF-EVL-NEXT:    ret i32 [[OR_LCSSA]]
265;
266; NO-VP-LABEL: @or(
267; NO-VP-NEXT:  entry:
268; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
269; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
270; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
271; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
272; NO-VP:       vector.ph:
273; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
274; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
275; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
276; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
277; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
278; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
279; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
280; NO-VP:       vector.body:
281; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
282; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
283; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
284; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
285; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
286; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
287; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
288; NO-VP-NEXT:    [[TMP10]] = or i32 [[TMP9]], [[VEC_PHI]]
289; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
290; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
291; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
292; NO-VP:       middle.block:
293; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
294; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
295; NO-VP:       scalar.ph:
296; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
297; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
298; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
299; NO-VP:       for.body:
300; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
301; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[OR:%.*]], [[FOR_BODY]] ]
302; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
303; NO-VP-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
304; NO-VP-NEXT:    [[OR]] = or i32 [[TMP12]], [[RDX]]
305; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
306; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
307; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
308; NO-VP:       for.end:
309; NO-VP-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
310; NO-VP-NEXT:    ret i32 [[OR_LCSSA]]
311;
312entry:
313  br label %for.body
314
315for.body:
316  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
317  %rdx = phi i32 [ %start, %entry ], [ %or, %for.body ]
318  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
319  %0 = load i32, ptr %arrayidx, align 4
320  %or = or i32 %0, %rdx
321  %iv.next = add nuw nsw i64 %iv, 1
322  %exitcond.not = icmp eq i64 %iv.next, %n
323  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
324
325for.end:
326  ret i32 %or
327}
328
329define i32 @and(ptr %a, i64 %n, i32 %start) {
330; IF-EVL-LABEL: @and(
331; IF-EVL-NEXT:  entry:
332; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
333; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
334; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
335; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
336; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
337; IF-EVL:       vector.ph:
338; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
339; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
340; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
341; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
342; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
343; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
344; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
345; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
346; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
347; IF-EVL:       vector.body:
348; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
349; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
350; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
351; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
352; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
353; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
354; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
355; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
356; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
357; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.and.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
358; IF-EVL-NEXT:    [[TMP15]] = and i32 [[TMP14]], [[VEC_PHI]]
359; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
360; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
361; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
362; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
363; IF-EVL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
364; IF-EVL:       middle.block:
365; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
366; IF-EVL:       scalar.ph:
367; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
368; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
369; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
370; IF-EVL:       for.body:
371; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
372; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
373; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
374; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
375; IF-EVL-NEXT:    [[AND]] = and i32 [[TMP18]], [[RDX]]
376; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
377; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
378; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
379; IF-EVL:       for.end:
380; IF-EVL-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
381; IF-EVL-NEXT:    ret i32 [[AND_LCSSA]]
382;
383; NO-VP-LABEL: @and(
384; NO-VP-NEXT:  entry:
385; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
386; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
387; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
388; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
389; NO-VP:       vector.ph:
390; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
391; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
392; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
393; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
394; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
395; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
396; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
397; NO-VP:       vector.body:
398; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
399; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
400; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
401; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
402; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
403; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
404; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
405; NO-VP-NEXT:    [[TMP10]] = and i32 [[TMP9]], [[VEC_PHI]]
406; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
407; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
408; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
409; NO-VP:       middle.block:
410; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
411; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
412; NO-VP:       scalar.ph:
413; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
414; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
415; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
416; NO-VP:       for.body:
417; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
418; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
419; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
420; NO-VP-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
421; NO-VP-NEXT:    [[AND]] = and i32 [[TMP12]], [[RDX]]
422; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
423; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
424; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
425; NO-VP:       for.end:
426; NO-VP-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
427; NO-VP-NEXT:    ret i32 [[AND_LCSSA]]
428;
429entry:
430  br label %for.body
431
432for.body:
433  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
434  %rdx = phi i32 [ %start, %entry ], [ %and, %for.body ]
435  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
436  %0 = load i32, ptr %arrayidx, align 4
437  %and = and i32 %0, %rdx
438  %iv.next = add nuw nsw i64 %iv, 1
439  %exitcond.not = icmp eq i64 %iv.next, %n
440  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
441
442for.end:
443  ret i32 %and
444}
445
446define i32 @xor(ptr %a, i64 %n, i32 %start) {
447; IF-EVL-LABEL: @xor(
448; IF-EVL-NEXT:  entry:
449; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
450; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
451; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
452; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
453; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
454; IF-EVL:       vector.ph:
455; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
456; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
457; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
458; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
459; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
460; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
461; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
462; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
463; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
464; IF-EVL:       vector.body:
465; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
466; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
467; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
468; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
469; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
470; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
471; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
472; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
473; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
474; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.xor.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
475; IF-EVL-NEXT:    [[TMP15]] = xor i32 [[TMP14]], [[VEC_PHI]]
476; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
477; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
478; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
479; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
480; IF-EVL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
481; IF-EVL:       middle.block:
482; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
483; IF-EVL:       scalar.ph:
484; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
485; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
486; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
487; IF-EVL:       for.body:
488; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
489; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ]
490; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
491; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
492; IF-EVL-NEXT:    [[XOR]] = xor i32 [[TMP18]], [[RDX]]
493; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
494; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
495; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
496; IF-EVL:       for.end:
497; IF-EVL-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
498; IF-EVL-NEXT:    ret i32 [[XOR_LCSSA]]
499;
500; NO-VP-LABEL: @xor(
501; NO-VP-NEXT:  entry:
502; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
503; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
504; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
505; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
506; NO-VP:       vector.ph:
507; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
508; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
509; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
510; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
511; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
512; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
513; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
514; NO-VP:       vector.body:
515; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
516; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
517; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
518; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
519; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
520; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
521; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
522; NO-VP-NEXT:    [[TMP10]] = xor i32 [[TMP9]], [[VEC_PHI]]
523; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
524; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
525; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
526; NO-VP:       middle.block:
527; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
528; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
529; NO-VP:       scalar.ph:
530; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
531; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
532; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
533; NO-VP:       for.body:
534; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
535; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[XOR:%.*]], [[FOR_BODY]] ]
536; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
537; NO-VP-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
538; NO-VP-NEXT:    [[XOR]] = xor i32 [[TMP12]], [[RDX]]
539; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
540; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
541; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
542; NO-VP:       for.end:
543; NO-VP-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
544; NO-VP-NEXT:    ret i32 [[XOR_LCSSA]]
545;
546entry:
547  br label %for.body
548
549for.body:
550  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
551  %rdx = phi i32 [ %start, %entry ], [ %xor, %for.body ]
552  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
553  %0 = load i32, ptr %arrayidx, align 4
554  %xor = xor i32 %0, %rdx
555  %iv.next = add nuw nsw i64 %iv, 1
556  %exitcond.not = icmp eq i64 %iv.next, %n
557  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
558
559for.end:
560  ret i32 %xor
561}
562
563define i32 @smin(ptr %a, i64 %n, i32 %start) {
564; IF-EVL-LABEL: @smin(
565; IF-EVL-NEXT:  entry:
566; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
567; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
568; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
569; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
570; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
571; IF-EVL:       vector.ph:
572; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
573; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
574; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
575; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
576; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
577; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
578; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
579; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
580; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
581; IF-EVL:       vector.body:
582; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
583; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
584; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
585; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
586; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
587; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
588; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
589; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
590; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
591; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smin.nxv4i32(i32 2147483647, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
592; IF-EVL-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
593; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
594; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
595; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
596; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
597; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
598; IF-EVL:       middle.block:
599; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
600; IF-EVL:       scalar.ph:
601; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
602; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
603; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
604; IF-EVL:       for.body:
605; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
606; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ]
607; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
608; IF-EVL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
609; IF-EVL-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[TMP17]], [[RDX]]
610; IF-EVL-NEXT:    [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]]
611; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
612; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
613; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
614; IF-EVL:       for.end:
615; IF-EVL-NEXT:    [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
616; IF-EVL-NEXT:    ret i32 [[SMIN_LCSSA]]
617;
618; NO-VP-LABEL: @smin(
619; NO-VP-NEXT:  entry:
620; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
621; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
622; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
623; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
624; NO-VP:       vector.ph:
625; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
626; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
627; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
628; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
629; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
630; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
631; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
632; NO-VP:       vector.body:
633; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
634; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
635; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
636; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
637; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
638; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
639; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
640; NO-VP-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smin.i32(i32 [[TMP9]], i32 [[VEC_PHI]])
641; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
642; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
643; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
644; NO-VP:       middle.block:
645; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
646; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
647; NO-VP:       scalar.ph:
648; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
649; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
650; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
651; NO-VP:       for.body:
652; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
653; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMIN:%.*]], [[FOR_BODY]] ]
654; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
655; NO-VP-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
656; NO-VP-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[RDX]]
657; NO-VP-NEXT:    [[SMIN]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]]
658; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
659; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
660; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
661; NO-VP:       for.end:
662; NO-VP-NEXT:    [[SMIN_LCSSA:%.*]] = phi i32 [ [[SMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
663; NO-VP-NEXT:    ret i32 [[SMIN_LCSSA]]
664;
665entry:
666  br label %for.body
667
668for.body:
669  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
670  %rdx = phi i32 [ %start, %entry ], [ %smin, %for.body ]
671  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
672  %0 = load i32, ptr %arrayidx, align 4
673  %cmp.i = icmp slt i32 %0, %rdx
674  %smin = select i1 %cmp.i, i32 %0, i32 %rdx
675  %iv.next = add nuw nsw i64 %iv, 1
676  %exitcond.not = icmp eq i64 %iv.next, %n
677  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
678
679for.end:
680  ret i32 %smin
681}
682
683define i32 @smax(ptr %a, i64 %n, i32 %start) {
684; IF-EVL-LABEL: @smax(
685; IF-EVL-NEXT:  entry:
686; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
687; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
688; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
689; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
690; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
691; IF-EVL:       vector.ph:
692; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
693; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
694; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
695; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
696; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
697; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
698; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
699; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
700; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
701; IF-EVL:       vector.body:
702; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
703; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
704; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
705; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
706; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
707; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
708; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
709; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
710; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
711; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.smax.nxv4i32(i32 -2147483648, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
712; IF-EVL-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
713; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
714; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
715; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
716; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
717; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
718; IF-EVL:       middle.block:
719; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
720; IF-EVL:       scalar.ph:
721; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
722; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
723; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
724; IF-EVL:       for.body:
725; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
726; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ]
727; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
728; IF-EVL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
729; IF-EVL-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[TMP17]], [[RDX]]
730; IF-EVL-NEXT:    [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]]
731; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
732; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
733; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
734; IF-EVL:       for.end:
735; IF-EVL-NEXT:    [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
736; IF-EVL-NEXT:    ret i32 [[SMAX_LCSSA]]
737;
738; NO-VP-LABEL: @smax(
739; NO-VP-NEXT:  entry:
740; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
741; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
742; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
743; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
744; NO-VP:       vector.ph:
745; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
746; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
747; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
748; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
749; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
750; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
751; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
752; NO-VP:       vector.body:
753; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
754; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
755; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
756; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
757; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
758; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
759; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
760; NO-VP-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.smax.i32(i32 [[TMP9]], i32 [[VEC_PHI]])
761; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
762; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
763; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
764; NO-VP:       middle.block:
765; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
766; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
767; NO-VP:       scalar.ph:
768; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
769; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
770; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
771; NO-VP:       for.body:
772; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
773; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SMAX:%.*]], [[FOR_BODY]] ]
774; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
775; NO-VP-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
776; NO-VP-NEXT:    [[CMP_I:%.*]] = icmp sgt i32 [[TMP11]], [[RDX]]
777; NO-VP-NEXT:    [[SMAX]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]]
778; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
779; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
780; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
781; NO-VP:       for.end:
782; NO-VP-NEXT:    [[SMAX_LCSSA:%.*]] = phi i32 [ [[SMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
783; NO-VP-NEXT:    ret i32 [[SMAX_LCSSA]]
784;
785entry:
786  br label %for.body
787
788for.body:
789  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
790  %rdx = phi i32 [ %start, %entry ], [ %smax, %for.body ]
791  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
792  %0 = load i32, ptr %arrayidx, align 4
793  %cmp.i = icmp sgt i32 %0, %rdx
794  %smax = select i1 %cmp.i, i32 %0, i32 %rdx
795  %iv.next = add nuw nsw i64 %iv, 1
796  %exitcond.not = icmp eq i64 %iv.next, %n
797  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
798
799for.end:
800  ret i32 %smax
801}
802
803define i32 @umin(ptr %a, i64 %n, i32 %start) {
804; IF-EVL-LABEL: @umin(
805; IF-EVL-NEXT:  entry:
806; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
807; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
808; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
809; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
810; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
811; IF-EVL:       vector.ph:
812; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
813; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
814; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
815; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
816; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
817; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
818; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
819; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
820; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
821; IF-EVL:       vector.body:
822; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
823; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
824; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
825; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
826; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
827; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
828; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
829; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
830; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
831; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umin.nxv4i32(i32 -1, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
832; IF-EVL-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
833; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
834; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
835; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
836; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
837; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
838; IF-EVL:       middle.block:
839; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
840; IF-EVL:       scalar.ph:
841; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
842; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
843; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
844; IF-EVL:       for.body:
845; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
846; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
847; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
848; IF-EVL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
849; IF-EVL-NEXT:    [[CMP_I:%.*]] = icmp ult i32 [[TMP17]], [[RDX]]
850; IF-EVL-NEXT:    [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]]
851; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
852; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
853; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
854; IF-EVL:       for.end:
855; IF-EVL-NEXT:    [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
856; IF-EVL-NEXT:    ret i32 [[UMIN_LCSSA]]
857;
858; NO-VP-LABEL: @umin(
859; NO-VP-NEXT:  entry:
860; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
861; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
862; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
863; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
864; NO-VP:       vector.ph:
865; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
866; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
867; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
868; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
869; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
870; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
871; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
872; NO-VP:       vector.body:
873; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
874; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
875; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
876; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
877; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
878; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
879; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
880; NO-VP-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umin.i32(i32 [[TMP9]], i32 [[VEC_PHI]])
881; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
882; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
883; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
884; NO-VP:       middle.block:
885; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
886; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
887; NO-VP:       scalar.ph:
888; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
889; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
890; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
891; NO-VP:       for.body:
892; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
893; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMIN:%.*]], [[FOR_BODY]] ]
894; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
895; NO-VP-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
896; NO-VP-NEXT:    [[CMP_I:%.*]] = icmp ult i32 [[TMP11]], [[RDX]]
897; NO-VP-NEXT:    [[UMIN]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]]
898; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
899; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
900; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
901; NO-VP:       for.end:
902; NO-VP-NEXT:    [[UMIN_LCSSA:%.*]] = phi i32 [ [[UMIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
903; NO-VP-NEXT:    ret i32 [[UMIN_LCSSA]]
904;
905entry:
906  br label %for.body
907
908for.body:
909  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
910  %rdx = phi i32 [ %start, %entry ], [ %umin, %for.body ]
911  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
912  %0 = load i32, ptr %arrayidx, align 4
913  %cmp.i = icmp ult i32 %0, %rdx
914  %umin = select i1 %cmp.i, i32 %0, i32 %rdx
915  %iv.next = add nuw nsw i64 %iv, 1
916  %exitcond.not = icmp eq i64 %iv.next, %n
917  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
918
919for.end:
920  ret i32 %umin
921}
922
923define i32 @umax(ptr %a, i64 %n, i32 %start) {
924; IF-EVL-LABEL: @umax(
925; IF-EVL-NEXT:  entry:
926; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
927; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
928; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
929; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
930; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
931; IF-EVL:       vector.ph:
932; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
933; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
934; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
935; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
936; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
937; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
938; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
939; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
940; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
941; IF-EVL:       vector.body:
942; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
943; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
944; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
945; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
946; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
947; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
948; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
949; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
950; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
951; IF-EVL-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vp.reduce.umax.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
952; IF-EVL-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP14]], i32 [[VEC_PHI]])
953; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
954; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
955; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
956; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
957; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
958; IF-EVL:       middle.block:
959; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
960; IF-EVL:       scalar.ph:
961; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
962; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
963; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
964; IF-EVL:       for.body:
965; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
966; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ]
967; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
968; IF-EVL-NEXT:    [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
969; IF-EVL-NEXT:    [[CMP_I:%.*]] = icmp ugt i32 [[TMP17]], [[RDX]]
970; IF-EVL-NEXT:    [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP17]], i32 [[RDX]]
971; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
972; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
973; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
974; IF-EVL:       for.end:
975; IF-EVL-NEXT:    [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
976; IF-EVL-NEXT:    ret i32 [[UMAX_LCSSA]]
977;
978; NO-VP-LABEL: @umax(
979; NO-VP-NEXT:  entry:
980; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
981; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
982; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
983; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
984; NO-VP:       vector.ph:
985; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
986; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
987; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
988; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
989; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
990; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
991; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
992; NO-VP:       vector.body:
993; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
994; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX:%.*]], [[VECTOR_BODY]] ]
995; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
996; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
997; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
998; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
999; NO-VP-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[WIDE_LOAD]])
1000; NO-VP-NEXT:    [[RDX_MINMAX]] = call i32 @llvm.umax.i32(i32 [[TMP9]], i32 [[VEC_PHI]])
1001; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1002; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1003; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1004; NO-VP:       middle.block:
1005; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1006; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1007; NO-VP:       scalar.ph:
1008; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1009; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1010; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1011; NO-VP:       for.body:
1012; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1013; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[UMAX:%.*]], [[FOR_BODY]] ]
1014; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1015; NO-VP-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1016; NO-VP-NEXT:    [[CMP_I:%.*]] = icmp ugt i32 [[TMP11]], [[RDX]]
1017; NO-VP-NEXT:    [[UMAX]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[RDX]]
1018; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1019; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1020; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
1021; NO-VP:       for.end:
1022; NO-VP-NEXT:    [[UMAX_LCSSA:%.*]] = phi i32 [ [[UMAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
1023; NO-VP-NEXT:    ret i32 [[UMAX_LCSSA]]
1024;
1025entry:
1026  br label %for.body
1027
1028for.body:
1029  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1030  %rdx = phi i32 [ %start, %entry ], [ %umax, %for.body ]
1031  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1032  %0 = load i32, ptr %arrayidx, align 4
1033  %cmp.i = icmp ugt i32 %0, %rdx
1034  %umax = select i1 %cmp.i, i32 %0, i32 %rdx
1035  %iv.next = add nuw nsw i64 %iv, 1
1036  %exitcond.not = icmp eq i64 %iv.next, %n
1037  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1038
1039for.end:
1040  ret i32 %umax
1041}
1042
1043define float @fadd(ptr %a, i64 %n, float %start) {
1044; IF-EVL-LABEL: @fadd(
1045; IF-EVL-NEXT:  entry:
1046; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1047; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1048; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1049; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1050; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1051; IF-EVL:       vector.ph:
1052; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1053; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1054; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1055; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1056; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1057; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1058; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1059; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1060; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1061; IF-EVL:       vector.body:
1062; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1063; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1064; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
1065; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1066; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1067; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1068; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
1069; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
1070; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1071; IF-EVL-NEXT:    [[TMP14:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1072; IF-EVL-NEXT:    [[TMP15]] = fadd reassoc float [[TMP14]], [[VEC_PHI]]
1073; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP10]] to i64
1074; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
1075; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1076; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1077; IF-EVL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1078; IF-EVL:       middle.block:
1079; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1080; IF-EVL:       scalar.ph:
1081; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1082; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1083; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1084; IF-EVL:       for.body:
1085; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1086; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1087; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1088; IF-EVL-NEXT:    [[TMP18:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1089; IF-EVL-NEXT:    [[ADD]] = fadd reassoc float [[TMP18]], [[RDX]]
1090; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1091; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1092; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1093; IF-EVL:       for.end:
1094; IF-EVL-NEXT:    [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
1095; IF-EVL-NEXT:    ret float [[ADD_LCSSA]]
1096;
1097; NO-VP-LABEL: @fadd(
1098; NO-VP-NEXT:  entry:
1099; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1100; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1101; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1102; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1103; NO-VP:       vector.ph:
1104; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1105; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1106; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1107; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1108; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1109; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1110; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1111; NO-VP:       vector.body:
1112; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1113; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
1114; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1115; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]]
1116; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
1117; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
1118; NO-VP-NEXT:    [[TMP9:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[WIDE_LOAD]])
1119; NO-VP-NEXT:    [[TMP10]] = fadd reassoc float [[TMP9]], [[VEC_PHI]]
1120; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1121; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1122; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1123; NO-VP:       middle.block:
1124; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1125; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1126; NO-VP:       scalar.ph:
1127; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1128; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1129; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1130; NO-VP:       for.body:
1131; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1132; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1133; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1134; NO-VP-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1135; NO-VP-NEXT:    [[ADD]] = fadd reassoc float [[TMP12]], [[RDX]]
1136; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1137; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1138; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1139; NO-VP:       for.end:
1140; NO-VP-NEXT:    [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
1141; NO-VP-NEXT:    ret float [[ADD_LCSSA]]
1142;
1143entry:
1144  br label %for.body
1145
1146for.body:
1147  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1148  %rdx = phi float [ %start, %entry ], [ %add, %for.body ]
1149  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1150  %0 = load float, ptr %arrayidx, align 4
1151  %add = fadd reassoc float %0, %rdx
1152  %iv.next = add nuw nsw i64 %iv, 1
1153  %exitcond.not = icmp eq i64 %iv.next, %n
1154  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1155
1156for.end:
1157  ret float %add
1158}
1159
1160; not support fmul reduction for scalable vector
1161define float @fmul(ptr %a, i64 %n, float %start) {
1162; IF-EVL-LABEL: @fmul(
1163; IF-EVL-NEXT:  entry:
1164; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1165; IF-EVL:       for.body:
1166; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1167; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
1168; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1169; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1170; IF-EVL-NEXT:    [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]]
1171; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1172; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1173; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
1174; IF-EVL:       for.end:
1175; IF-EVL-NEXT:    [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ]
1176; IF-EVL-NEXT:    ret float [[MUL_LCSSA]]
1177;
1178; NO-VP-LABEL: @fmul(
1179; NO-VP-NEXT:  entry:
1180; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
1181; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1182; NO-VP:       vector.ph:
1183; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
1184; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1185; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1186; NO-VP:       vector.body:
1187; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1188; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1189; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
1190; NO-VP-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1191; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1192; NO-VP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1193; NO-VP-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4
1194; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
1195; NO-VP-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
1196; NO-VP-NEXT:    [[TMP6:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]])
1197; NO-VP-NEXT:    [[TMP7]] = fmul reassoc float [[TMP6]], [[VEC_PHI]]
1198; NO-VP-NEXT:    [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD2]])
1199; NO-VP-NEXT:    [[TMP9]] = fmul reassoc float [[TMP8]], [[VEC_PHI1]]
1200; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1201; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1202; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1203; NO-VP:       middle.block:
1204; NO-VP-NEXT:    [[BIN_RDX:%.*]] = fmul reassoc float [[TMP9]], [[TMP7]]
1205; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1206; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1207; NO-VP:       scalar.ph:
1208; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1209; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1210; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1211; NO-VP:       for.body:
1212; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1213; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
1214; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1215; NO-VP-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1216; NO-VP-NEXT:    [[MUL]] = fmul reassoc float [[TMP11]], [[RDX]]
1217; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1218; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1219; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1220; NO-VP:       for.end:
1221; NO-VP-NEXT:    [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
1222; NO-VP-NEXT:    ret float [[MUL_LCSSA]]
1223;
1224entry:
1225  br label %for.body
1226
1227for.body:
1228  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1229  %rdx = phi float [ %start, %entry ], [ %mul, %for.body ]
1230  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1231  %0 = load float, ptr %arrayidx, align 4
1232  %mul = fmul reassoc float %0, %rdx
1233  %iv.next = add nuw nsw i64 %iv, 1
1234  %exitcond.not = icmp eq i64 %iv.next, %n
1235  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1236
1237for.end:
1238  ret float %mul
1239}
1240
1241define float @fmin(ptr %a, i64 %n, float %start) #0 {
1242; IF-EVL-LABEL: @fmin(
1243; IF-EVL-NEXT:  entry:
1244; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1245; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1246; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1247; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1248; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1249; IF-EVL:       vector.ph:
1250; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1251; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1252; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1253; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1254; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1255; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1256; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1257; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1258; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1259; IF-EVL:       vector.body:
1260; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1261; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1262; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
1263; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1264; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1265; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1266; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
1267; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
1268; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1269; IF-EVL-NEXT:    [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmin.nxv4f32(float 0x47EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1270; IF-EVL-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP14]], [[VEC_PHI]]
1271; IF-EVL-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
1272; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
1273; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
1274; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1275; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1276; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1277; IF-EVL:       middle.block:
1278; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1279; IF-EVL:       scalar.ph:
1280; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1281; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1282; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1283; IF-EVL:       for.body:
1284; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1285; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1286; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1287; IF-EVL-NEXT:    [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1288; IF-EVL-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP17]], [[RDX]]
1289; IF-EVL-NEXT:    [[MIN]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]]
1290; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1291; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1292; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1293; IF-EVL:       for.end:
1294; IF-EVL-NEXT:    [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1295; IF-EVL-NEXT:    ret float [[MIN_LCSSA]]
1296;
1297; NO-VP-LABEL: @fmin(
1298; NO-VP-NEXT:  entry:
1299; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1300; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1301; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1302; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1303; NO-VP:       vector.ph:
1304; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1305; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1306; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1307; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1308; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1309; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1310; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1311; NO-VP:       vector.body:
1312; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1313; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
1314; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1315; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]]
1316; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
1317; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
1318; NO-VP-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]])
1319; NO-VP-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP9]], [[VEC_PHI]]
1320; NO-VP-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]]
1321; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1322; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1323; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1324; NO-VP:       middle.block:
1325; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1326; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1327; NO-VP:       scalar.ph:
1328; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1329; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1330; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1331; NO-VP:       for.body:
1332; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1333; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1334; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1335; NO-VP-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1336; NO-VP-NEXT:    [[CMP:%.*]] = fcmp fast olt float [[TMP11]], [[RDX]]
1337; NO-VP-NEXT:    [[MIN]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]]
1338; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1339; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1340; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
1341; NO-VP:       for.end:
1342; NO-VP-NEXT:    [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1343; NO-VP-NEXT:    ret float [[MIN_LCSSA]]
1344;
1345entry:
1346  br label %for.body
1347
1348for.body:
1349  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1350  %rdx = phi float [ %start, %entry ], [ %min, %for.body ]
1351  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1352  %0 = load float, ptr %arrayidx, align 4
1353  %cmp = fcmp fast olt float %0, %rdx
1354  %min = select i1 %cmp, float %0, float %rdx
1355  %iv.next = add nuw nsw i64 %iv, 1
1356  %exitcond.not = icmp eq i64 %iv.next, %n
1357  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1358
1359for.end:
1360  ret float %min
1361}
1362
1363define float @fmax(ptr %a, i64 %n, float %start) #0 {
1364; IF-EVL-LABEL: @fmax(
1365; IF-EVL-NEXT:  entry:
1366; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1367; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1368; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1369; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1370; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1371; IF-EVL:       vector.ph:
1372; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1373; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1374; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1375; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1376; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1377; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1378; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1379; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1380; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1381; IF-EVL:       vector.body:
1382; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1383; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1384; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
1385; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1386; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1387; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1388; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
1389; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
1390; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1391; IF-EVL-NEXT:    [[TMP14:%.*]] = call fast float @llvm.vp.reduce.fmax.nxv4f32(float 0xC7EFFFFFE0000000, <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1392; IF-EVL-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP14]], [[VEC_PHI]]
1393; IF-EVL-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP14]], float [[VEC_PHI]]
1394; IF-EVL-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP10]] to i64
1395; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]]
1396; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1397; IF-EVL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1398; IF-EVL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1399; IF-EVL:       middle.block:
1400; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1401; IF-EVL:       scalar.ph:
1402; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1403; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1404; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1405; IF-EVL:       for.body:
1406; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1407; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1408; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1409; IF-EVL-NEXT:    [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1410; IF-EVL-NEXT:    [[CMP:%.*]] = fcmp fast ogt float [[TMP17]], [[RDX]]
1411; IF-EVL-NEXT:    [[MAX]] = select i1 [[CMP]], float [[TMP17]], float [[RDX]]
1412; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1413; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1414; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
1415; IF-EVL:       for.end:
1416; IF-EVL-NEXT:    [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1417; IF-EVL-NEXT:    ret float [[MAX_LCSSA]]
1418;
1419; NO-VP-LABEL: @fmax(
1420; NO-VP-NEXT:  entry:
1421; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1422; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1423; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1424; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1425; NO-VP:       vector.ph:
1426; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1427; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1428; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1429; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1430; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1431; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1432; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1433; NO-VP:       vector.body:
1434; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1435; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ]
1436; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1437; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]]
1438; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
1439; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
1440; NO-VP-NEXT:    [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]])
1441; NO-VP-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast ogt float [[TMP9]], [[VEC_PHI]]
1442; NO-VP-NEXT:    [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP9]], float [[VEC_PHI]]
1443; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1444; NO-VP-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1445; NO-VP-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1446; NO-VP:       middle.block:
1447; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1448; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1449; NO-VP:       scalar.ph:
1450; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1451; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1452; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1453; NO-VP:       for.body:
1454; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1455; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1456; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1457; NO-VP-NEXT:    [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1458; NO-VP-NEXT:    [[CMP:%.*]] = fcmp fast ogt float [[TMP11]], [[RDX]]
1459; NO-VP-NEXT:    [[MAX]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]]
1460; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1461; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1462; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1463; NO-VP:       for.end:
1464; NO-VP-NEXT:    [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
1465; NO-VP-NEXT:    ret float [[MAX_LCSSA]]
1466;
1467entry:
1468  br label %for.body
1469
1470for.body:
1471  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1472  %rdx = phi float [ %start, %entry ], [ %max, %for.body ]
1473  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1474  %0 = load float, ptr %arrayidx, align 4
1475  %cmp = fcmp fast ogt float %0, %rdx
1476  %max = select i1 %cmp, float %0, float %rdx
1477  %iv.next = add nuw nsw i64 %iv, 1
1478  %exitcond.not = icmp eq i64 %iv.next, %n
1479  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1480
1481for.end:
1482  ret float %max
1483}
1484
1485define float @fminimum(ptr %a, i64 %n, float %start) {
1486; IF-EVL-LABEL: @fminimum(
1487; IF-EVL-NEXT:  entry:
1488; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1489; IF-EVL:       for.body:
1490; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1491; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1492; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1493; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1494; IF-EVL-NEXT:    [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]])
1495; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1496; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1497; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
1498; IF-EVL:       for.end:
1499; IF-EVL-NEXT:    [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ]
1500; IF-EVL-NEXT:    ret float [[MIN_LCSSA]]
1501;
1502; NO-VP-LABEL: @fminimum(
1503; NO-VP-NEXT:  entry:
1504; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
1505; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1506; NO-VP:       vector.ph:
1507; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
1508; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1509; NO-VP-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1510; NO-VP-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1511; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1512; NO-VP:       vector.body:
1513; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1514; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
1515; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1516; NO-VP-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1517; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1518; NO-VP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1519; NO-VP-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8
1520; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
1521; NO-VP-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
1522; NO-VP-NEXT:    [[TMP6]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]])
1523; NO-VP-NEXT:    [[TMP7]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]])
1524; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1525; NO-VP-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1526; NO-VP-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1527; NO-VP:       middle.block:
1528; NO-VP-NEXT:    [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP6]], <8 x float> [[TMP7]])
1529; NO-VP-NEXT:    [[TMP9:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[RDX_MINMAX]])
1530; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1531; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1532; NO-VP:       scalar.ph:
1533; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1534; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1535; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1536; NO-VP:       for.body:
1537; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1538; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN:%.*]], [[FOR_BODY]] ]
1539; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1540; NO-VP-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1541; NO-VP-NEXT:    [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP10]])
1542; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1543; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1544; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
1545; NO-VP:       for.end:
1546; NO-VP-NEXT:    [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
1547; NO-VP-NEXT:    ret float [[MIN_LCSSA]]
1548;
1549entry:
1550  br label %for.body
1551
1552for.body:
1553  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1554  %rdx = phi float [ %start, %entry ], [ %min, %for.body ]
1555  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1556  %0 = load float, ptr %arrayidx, align 4
1557  %min = tail call float @llvm.minimum.f32(float %rdx, float %0)
1558  %iv.next = add nuw nsw i64 %iv, 1
1559  %exitcond.not = icmp eq i64 %iv.next, %n
1560  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1561
1562for.end:
1563  ret float %min
1564}
1565
1566define float @fmaximum(ptr %a, i64 %n, float %start) {
1567; IF-EVL-LABEL: @fmaximum(
1568; IF-EVL-NEXT:  entry:
1569; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1570; IF-EVL:       for.body:
1571; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1572; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1573; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]]
1574; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1575; IF-EVL-NEXT:    [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]])
1576; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1577; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]]
1578; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4]]
1579; IF-EVL:       for.end:
1580; IF-EVL-NEXT:    [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ]
1581; IF-EVL-NEXT:    ret float [[MAX_LCSSA]]
1582;
1583; NO-VP-LABEL: @fmaximum(
1584; NO-VP-NEXT:  entry:
1585; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
1586; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1587; NO-VP:       vector.ph:
1588; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
1589; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1590; NO-VP-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[START:%.*]], i64 0
1591; NO-VP-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <8 x float> [[MINMAX_IDENT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
1592; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1593; NO-VP:       vector.body:
1594; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1595; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
1596; NO-VP-NEXT:    [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1597; NO-VP-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1598; NO-VP-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP0]]
1599; NO-VP-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
1600; NO-VP-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8
1601; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP4]], align 4
1602; NO-VP-NEXT:    [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4
1603; NO-VP-NEXT:    [[TMP6]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]])
1604; NO-VP-NEXT:    [[TMP7]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]])
1605; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1606; NO-VP-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1607; NO-VP-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1608; NO-VP:       middle.block:
1609; NO-VP-NEXT:    [[RDX_MINMAX:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP6]], <8 x float> [[TMP7]])
1610; NO-VP-NEXT:    [[TMP9:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[RDX_MINMAX]])
1611; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1612; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1613; NO-VP:       scalar.ph:
1614; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1615; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1616; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1617; NO-VP:       for.body:
1618; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1619; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX:%.*]], [[FOR_BODY]] ]
1620; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1621; NO-VP-NEXT:    [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1622; NO-VP-NEXT:    [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP10]])
1623; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1624; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1625; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
1626; NO-VP:       for.end:
1627; NO-VP-NEXT:    [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
1628; NO-VP-NEXT:    ret float [[MAX_LCSSA]]
1629;
1630entry:
1631  br label %for.body
1632
1633for.body:
1634  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1635  %rdx = phi float [ %start, %entry ], [ %max, %for.body ]
1636  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1637  %0 = load float, ptr %arrayidx, align 4
1638  %max = tail call float @llvm.maximum.f32(float %rdx, float %0)
1639  %iv.next = add nuw nsw i64 %iv, 1
1640  %exitcond.not = icmp eq i64 %iv.next, %n
1641  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1642
1643for.end:
1644  ret float %max
1645}
1646
1647define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
1648; IF-EVL-LABEL: @fmuladd(
1649; IF-EVL-NEXT:  entry:
1650; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1651; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1652; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1653; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1654; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1655; IF-EVL:       vector.ph:
1656; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1657; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1658; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1659; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1660; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1661; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1662; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1663; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1664; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1665; IF-EVL:       vector.body:
1666; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1667; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1668; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
1669; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1670; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1671; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1672; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP11]]
1673; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
1674; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1675; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]]
1676; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1677; IF-EVL-NEXT:    [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1678; IF-EVL-NEXT:    [[TMP16:%.*]] = fmul reassoc <vscale x 4 x float> [[VP_OP_LOAD]], [[VP_OP_LOAD1]]
1679; IF-EVL-NEXT:    [[TMP17:%.*]] = call reassoc float @llvm.vp.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1680; IF-EVL-NEXT:    [[TMP18]] = fadd reassoc float [[TMP17]], [[VEC_PHI]]
1681; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP10]] to i64
1682; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP19]], [[EVL_BASED_IV]]
1683; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1684; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1685; IF-EVL-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1686; IF-EVL:       middle.block:
1687; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1688; IF-EVL:       scalar.ph:
1689; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1690; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP18]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1691; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1692; IF-EVL:       for.body:
1693; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1694; IF-EVL-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
1695; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1696; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1697; IF-EVL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1698; IF-EVL-NEXT:    [[TMP22:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1699; IF-EVL-NEXT:    [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP21]], float [[TMP22]], float [[RDX]])
1700; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1701; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1702; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1703; IF-EVL:       for.end:
1704; IF-EVL-NEXT:    [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
1705; IF-EVL-NEXT:    ret float [[MULADD_LCSSA]]
1706;
1707; NO-VP-LABEL: @fmuladd(
1708; NO-VP-NEXT:  entry:
1709; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1710; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1711; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1712; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1713; NO-VP:       vector.ph:
1714; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1715; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1716; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1717; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1718; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1719; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1720; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1721; NO-VP:       vector.body:
1722; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1723; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
1724; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1725; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[TMP6]]
1726; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
1727; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
1728; NO-VP-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP6]]
1729; NO-VP-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP9]], i32 0
1730; NO-VP-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP10]], align 4
1731; NO-VP-NEXT:    [[TMP11:%.*]] = fmul reassoc <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
1732; NO-VP-NEXT:    [[TMP12:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP11]])
1733; NO-VP-NEXT:    [[TMP13]] = fadd reassoc float [[TMP12]], [[VEC_PHI]]
1734; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1735; NO-VP-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1736; NO-VP-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
1737; NO-VP:       middle.block:
1738; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1739; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1740; NO-VP:       scalar.ph:
1741; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1742; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1743; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1744; NO-VP:       for.body:
1745; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1746; NO-VP-NEXT:    [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MULADD:%.*]], [[FOR_BODY]] ]
1747; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1748; NO-VP-NEXT:    [[TMP15:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1749; NO-VP-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1750; NO-VP-NEXT:    [[TMP16:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
1751; NO-VP-NEXT:    [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP15]], float [[TMP16]], float [[RDX]])
1752; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1753; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1754; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]]
1755; NO-VP:       for.end:
1756; NO-VP-NEXT:    [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
1757; NO-VP-NEXT:    ret float [[MULADD_LCSSA]]
1758;
1759entry:
1760  br label %for.body
1761
1762for.body:
1763  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1764  %rdx = phi float [ %start, %entry ], [ %muladd, %for.body ]
1765  %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
1766  %0 = load float, ptr %arrayidx, align 4
1767  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv
1768  %1 = load float, ptr %arrayidx2, align 4
1769  %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %rdx)
1770  %iv.next = add nuw nsw i64 %iv, 1
1771  %exitcond.not = icmp eq i64 %iv.next, %n
1772  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1773
1774for.end:
1775  ret float %muladd
1776}
1777
1778define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
1779; IF-EVL-LABEL: @anyof_icmp(
1780; IF-EVL-NEXT:  entry:
1781; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1782; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1783; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1784; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1785; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1786; IF-EVL:       vector.ph:
1787; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1788; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1789; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1790; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1791; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1792; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1793; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1794; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1795; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1796; IF-EVL:       vector.body:
1797; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1798; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1799; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
1800; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1801; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1802; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1803; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
1804; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
1805; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1806; IF-EVL-NEXT:    [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 3)
1807; IF-EVL-NEXT:    [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1808; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
1809; IF-EVL-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
1810; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
1811; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1812; IF-EVL-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1813; IF-EVL-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1814; IF-EVL:       middle.block:
1815; IF-EVL-NEXT:    [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
1816; IF-EVL-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP19]]
1817; IF-EVL-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]]
1818; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1819; IF-EVL:       scalar.ph:
1820; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1821; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1822; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1823; IF-EVL:       for.body:
1824; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1825; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ]
1826; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1827; IF-EVL-NEXT:    [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1828; IF-EVL-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[TMP21]], 3
1829; IF-EVL-NEXT:    [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]]
1830; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1831; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1832; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
1833; IF-EVL:       for.end:
1834; IF-EVL-NEXT:    [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
1835; IF-EVL-NEXT:    ret i32 [[ANYOF_LCSSA]]
1836;
1837; NO-VP-LABEL: @anyof_icmp(
1838; NO-VP-NEXT:  entry:
1839; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1840; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1841; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1842; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1843; NO-VP:       vector.ph:
1844; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1845; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1846; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1847; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1848; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1849; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1850; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1851; NO-VP:       vector.body:
1852; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1853; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
1854; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1855; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
1856; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
1857; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
1858; NO-VP-NEXT:    [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3)
1859; NO-VP-NEXT:    [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]]
1860; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1861; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1862; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
1863; NO-VP:       middle.block:
1864; NO-VP-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]])
1865; NO-VP-NEXT:    [[TMP13:%.*]] = freeze i1 [[TMP12]]
1866; NO-VP-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]]
1867; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1868; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1869; NO-VP:       scalar.ph:
1870; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1871; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1872; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
1873; NO-VP:       for.body:
1874; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1875; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ]
1876; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1877; NO-VP-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
1878; NO-VP-NEXT:    [[CMP_I:%.*]] = icmp slt i32 [[TMP14]], 3
1879; NO-VP-NEXT:    [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]]
1880; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1881; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1882; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]]
1883; NO-VP:       for.end:
1884; NO-VP-NEXT:    [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
1885; NO-VP-NEXT:    ret i32 [[ANYOF_LCSSA]]
1886;
1887entry:
1888  br label %for.body
1889
1890for.body:
1891  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1892  %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ]
1893  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
1894  %0 = load i32, ptr %arrayidx, align 4
1895  %cmp.i = icmp slt i32 %0, 3
1896  %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx
1897  %iv.next = add nuw nsw i64 %iv, 1
1898  %exitcond.not = icmp eq i64 %iv.next, %n
1899  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
1900
1901for.end:
1902  ret i32 %anyof
1903}
1904
1905define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) {
1906; IF-EVL-LABEL: @anyof_fcmp(
1907; IF-EVL-NEXT:  entry:
1908; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N:%.*]]
1909; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
1910; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1911; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
1912; IF-EVL-NEXT:    br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1913; IF-EVL:       vector.ph:
1914; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1915; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1916; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP5]], 1
1917; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
1918; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]]
1919; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1920; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
1921; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
1922; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
1923; IF-EVL:       vector.body:
1924; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1925; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
1926; IF-EVL-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
1927; IF-EVL-NEXT:    [[TMP9:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
1928; IF-EVL-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP9]], i32 4, i1 true)
1929; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[EVL_BASED_IV]], 0
1930; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP11]]
1931; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i32 0
1932; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
1933; IF-EVL-NEXT:    [[TMP14:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], splat (float 3.000000e+00)
1934; IF-EVL-NEXT:    [[TMP15:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP14]]
1935; IF-EVL-NEXT:    [[TMP16]] = call <vscale x 4 x i1> @llvm.vp.merge.nxv4i1(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> [[VEC_PHI]], i32 [[TMP10]])
1936; IF-EVL-NEXT:    [[TMP17:%.*]] = zext i32 [[TMP10]] to i64
1937; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
1938; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
1939; IF-EVL-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1940; IF-EVL-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1941; IF-EVL:       middle.block:
1942; IF-EVL-NEXT:    [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP16]])
1943; IF-EVL-NEXT:    [[TMP20:%.*]] = freeze i1 [[TMP19]]
1944; IF-EVL-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP20]], i32 [[INV:%.*]], i32 [[START:%.*]]
1945; IF-EVL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1946; IF-EVL:       scalar.ph:
1947; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1948; IF-EVL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1949; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
1950; IF-EVL:       for.body:
1951; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
1952; IF-EVL-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ]
1953; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
1954; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1955; IF-EVL-NEXT:    [[CMP_I:%.*]] = fcmp fast olt float [[TMP21]], 3.000000e+00
1956; IF-EVL-NEXT:    [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]]
1957; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1958; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1959; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
1960; IF-EVL:       for.end:
1961; IF-EVL-NEXT:    [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
1962; IF-EVL-NEXT:    ret i32 [[ANYOF_LCSSA]]
1963;
1964; NO-VP-LABEL: @anyof_fcmp(
1965; NO-VP-NEXT:  entry:
1966; NO-VP-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1967; NO-VP-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1968; NO-VP-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
1969; NO-VP-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1970; NO-VP:       vector.ph:
1971; NO-VP-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
1972; NO-VP-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
1973; NO-VP-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
1974; NO-VP-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
1975; NO-VP-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
1976; NO-VP-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
1977; NO-VP-NEXT:    br label [[VECTOR_BODY:%.*]]
1978; NO-VP:       vector.body:
1979; NO-VP-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1980; NO-VP-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
1981; NO-VP-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
1982; NO-VP-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]]
1983; NO-VP-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0
1984; NO-VP-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4
1985; NO-VP-NEXT:    [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00)
1986; NO-VP-NEXT:    [[TMP10]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]]
1987; NO-VP-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
1988; NO-VP-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1989; NO-VP-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
1990; NO-VP:       middle.block:
1991; NO-VP-NEXT:    [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP10]])
1992; NO-VP-NEXT:    [[TMP13:%.*]] = freeze i1 [[TMP12]]
1993; NO-VP-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[INV:%.*]], i32 [[START:%.*]]
1994; NO-VP-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
1995; NO-VP-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1996; NO-VP:       scalar.ph:
1997; NO-VP-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1998; NO-VP-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
1999; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
2000; NO-VP:       for.body:
2001; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
2002; NO-VP-NEXT:    [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ANYOF:%.*]], [[FOR_BODY]] ]
2003; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
2004; NO-VP-NEXT:    [[TMP14:%.*]] = load float, ptr [[ARRAYIDX]], align 4
2005; NO-VP-NEXT:    [[CMP_I:%.*]] = fcmp fast olt float [[TMP14]], 3.000000e+00
2006; NO-VP-NEXT:    [[ANYOF]] = select i1 [[CMP_I]], i32 [[INV]], i32 [[RDX]]
2007; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
2008; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
2009; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
2010; NO-VP:       for.end:
2011; NO-VP-NEXT:    [[ANYOF_LCSSA:%.*]] = phi i32 [ [[ANYOF]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
2012; NO-VP-NEXT:    ret i32 [[ANYOF_LCSSA]]
2013;
2014entry:
2015  br label %for.body
2016
2017for.body:
2018  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2019  %rdx = phi i32 [ %start, %entry ], [ %anyof, %for.body ]
2020  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv
2021  %0 = load float, ptr %arrayidx, align 4
2022  %cmp.i = fcmp fast olt float %0, 3.0
2023  %anyof = select i1 %cmp.i, i32 %inv, i32 %rdx
2024  %iv.next = add nuw nsw i64 %iv, 1
2025  %exitcond.not = icmp eq i64 %iv.next, %n
2026  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
2027
2028for.end:
2029  ret i32 %anyof
2030}
2031
2032declare float @llvm.minimum.f32(float, float)
2033declare float @llvm.maximum.f32(float, float)
2034declare float @llvm.fmuladd.f32(float, float, float)
2035
2036attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
2037
2038!0 = distinct !{!0, !1}
2039!1 = !{!"llvm.loop.vectorize.enable", i1 true}
2040