1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes=loop-vectorize \
3; RUN: -force-tail-folding-style=data-with-evl \
4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL
6
7; RUN: opt -passes=loop-vectorize \
8; RUN: -force-tail-folding-style=none \
9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
10; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP
11
12define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
13; IF-EVL-LABEL: define void @vp_smax(
14; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
15; IF-EVL-NEXT:  [[ENTRY:.*]]:
16; IF-EVL-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C]] to i64
17; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
18; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
19; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
20; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
21; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
22; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
23; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
24; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
25; IF-EVL:       [[VECTOR_MEMCHECK]]:
26; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
27; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
28; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
29; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
30; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
31; IF-EVL-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP5]], 4
32; IF-EVL-NEXT:    [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
33; IF-EVL-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
34; IF-EVL-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
35; IF-EVL-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
36; IF-EVL:       [[VECTOR_PH]]:
37; IF-EVL-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
38; IF-EVL-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 4
39; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP28]], 1
40; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
41; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]]
42; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
43; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
44; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
45; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
46; IF-EVL:       [[VECTOR_BODY]]:
47; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
48; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
49; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
50; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
51; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
52; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
53; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
54; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
55; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
56; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
57; IF-EVL-NEXT:    [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
58; IF-EVL-NEXT:    [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
59; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
60; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
61; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
62; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
63; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
64; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
65; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
66; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]]
67; IF-EVL:       [[MIDDLE_BLOCK]]:
68; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
69; IF-EVL:       [[SCALAR_PH]]:
70; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
71; IF-EVL-NEXT:    br label %[[LOOP:.*]]
72; IF-EVL:       [[LOOP]]:
73; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
74; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
75; IF-EVL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4
76; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
77; IF-EVL-NEXT:    [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4
78; IF-EVL-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP20]], i32 [[TMP21]])
79; IF-EVL-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
80; IF-EVL-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
81; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
82; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
83; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
84; IF-EVL:       [[EXIT]]:
85; IF-EVL-NEXT:    ret void
86;
87; NO-VP-LABEL: define void @vp_smax(
88; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
89; NO-VP-NEXT:  [[ENTRY:.*]]:
90; NO-VP-NEXT:    br label %[[LOOP:.*]]
91; NO-VP:       [[LOOP]]:
92; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
93; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
94; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
95; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
96; NO-VP-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
97; NO-VP-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[TMP1]])
98; NO-VP-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
99; NO-VP-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
100; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
101; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
102; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
103; NO-VP:       [[EXIT]]:
104; NO-VP-NEXT:    ret void
105;
106
107entry:
108  br label %loop
109
110loop:
111  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
112  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
113  %0 = load i32, ptr %gep, align 4
114  %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
115  %1 = load i32, ptr %gep3, align 4
116  %. = tail call i32 @llvm.smax.i32(i32 %0, i32 %1)
117  %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
118  store i32 %., ptr %gep11, align 4
119  %iv.next = add nuw nsw i64 %iv, 1
120  %exitcond.not = icmp eq i64 %iv.next, %N
121  br i1 %exitcond.not, label %exit, label %loop
122
123exit:
124  ret void
125}
126
127define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
128; IF-EVL-LABEL: define void @vp_smin(
129; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
130; IF-EVL-NEXT:  [[ENTRY:.*]]:
131; IF-EVL-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C]] to i64
132; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
133; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
134; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
135; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
136; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
137; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
138; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
139; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
140; IF-EVL:       [[VECTOR_MEMCHECK]]:
141; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
142; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
143; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
144; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
145; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
146; IF-EVL-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP5]], 4
147; IF-EVL-NEXT:    [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
148; IF-EVL-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
149; IF-EVL-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
150; IF-EVL-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
151; IF-EVL:       [[VECTOR_PH]]:
152; IF-EVL-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
153; IF-EVL-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 4
154; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP28]], 1
155; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
156; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]]
157; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
158; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
159; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
160; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
161; IF-EVL:       [[VECTOR_BODY]]:
162; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
163; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
164; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
165; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
166; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
167; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
168; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
169; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
170; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
171; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
172; IF-EVL-NEXT:    [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
173; IF-EVL-NEXT:    [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.smin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
174; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
175; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
176; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
177; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
178; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
179; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
180; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
181; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
182; IF-EVL:       [[MIDDLE_BLOCK]]:
183; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
184; IF-EVL:       [[SCALAR_PH]]:
185; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
186; IF-EVL-NEXT:    br label %[[LOOP:.*]]
187; IF-EVL:       [[LOOP]]:
188; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
189; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
190; IF-EVL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4
191; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
192; IF-EVL-NEXT:    [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4
193; IF-EVL-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP20]], i32 [[TMP21]])
194; IF-EVL-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
195; IF-EVL-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
196; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
197; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
198; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
199; IF-EVL:       [[EXIT]]:
200; IF-EVL-NEXT:    ret void
201;
202; NO-VP-LABEL: define void @vp_smin(
203; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
204; NO-VP-NEXT:  [[ENTRY:.*]]:
205; NO-VP-NEXT:    br label %[[LOOP:.*]]
206; NO-VP:       [[LOOP]]:
207; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
208; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
209; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
210; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
211; NO-VP-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
212; NO-VP-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[TMP1]])
213; NO-VP-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
214; NO-VP-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
215; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
216; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
217; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
218; NO-VP:       [[EXIT]]:
219; NO-VP-NEXT:    ret void
220;
221
222entry:
223  br label %loop
224
225loop:
226  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
227  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
228  %0 = load i32, ptr %gep, align 4
229  %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
230  %1 = load i32, ptr %gep3, align 4
231  %. = tail call i32 @llvm.smin.i32(i32 %0, i32 %1)
232  %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
233  store i32 %., ptr %gep11, align 4
234  %iv.next = add nuw nsw i64 %iv, 1
235  %exitcond.not = icmp eq i64 %iv.next, %N
236  br i1 %exitcond.not, label %exit, label %loop
237
238exit:
239  ret void
240}
241
242define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
243; IF-EVL-LABEL: define void @vp_umax(
244; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
245; IF-EVL-NEXT:  [[ENTRY:.*]]:
246; IF-EVL-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C]] to i64
247; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
248; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
249; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
250; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
251; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
252; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
253; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
254; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
255; IF-EVL:       [[VECTOR_MEMCHECK]]:
256; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
257; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
258; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
259; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
260; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
261; IF-EVL-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP5]], 4
262; IF-EVL-NEXT:    [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
263; IF-EVL-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
264; IF-EVL-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
265; IF-EVL-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
266; IF-EVL:       [[VECTOR_PH]]:
267; IF-EVL-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
268; IF-EVL-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 4
269; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP28]], 1
270; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
271; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]]
272; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
273; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
274; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
275; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
276; IF-EVL:       [[VECTOR_BODY]]:
277; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
278; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
279; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
280; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
281; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
282; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
283; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
284; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
285; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
286; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
287; IF-EVL-NEXT:    [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
288; IF-EVL-NEXT:    [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umax.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
289; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
290; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
291; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
292; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
293; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
294; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
295; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
296; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
297; IF-EVL:       [[MIDDLE_BLOCK]]:
298; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
299; IF-EVL:       [[SCALAR_PH]]:
300; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
301; IF-EVL-NEXT:    br label %[[LOOP:.*]]
302; IF-EVL:       [[LOOP]]:
303; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
304; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
305; IF-EVL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4
306; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
307; IF-EVL-NEXT:    [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4
308; IF-EVL-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP20]], i32 [[TMP21]])
309; IF-EVL-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
310; IF-EVL-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
311; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
312; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
313; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
314; IF-EVL:       [[EXIT]]:
315; IF-EVL-NEXT:    ret void
316;
317; NO-VP-LABEL: define void @vp_umax(
318; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
319; NO-VP-NEXT:  [[ENTRY:.*]]:
320; NO-VP-NEXT:    br label %[[LOOP:.*]]
321; NO-VP:       [[LOOP]]:
322; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
323; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
324; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
325; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
326; NO-VP-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
327; NO-VP-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[TMP1]])
328; NO-VP-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
329; NO-VP-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
330; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
331; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
332; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
333; NO-VP:       [[EXIT]]:
334; NO-VP-NEXT:    ret void
335;
336
337entry:
338  br label %loop
339
340loop:
341  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
342  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
343  %0 = load i32, ptr %gep, align 4
344  %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
345  %1 = load i32, ptr %gep3, align 4
346  %. = tail call i32 @llvm.umax.i32(i32 %0, i32 %1)
347  %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
348  store i32 %., ptr %gep11, align 4
349  %iv.next = add nuw nsw i64 %iv, 1
350  %exitcond.not = icmp eq i64 %iv.next, %N
351  br i1 %exitcond.not, label %exit, label %loop
352
353exit:
354  ret void
355}
356
357define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
358; IF-EVL-LABEL: define void @vp_umin(
359; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
360; IF-EVL-NEXT:  [[ENTRY:.*]]:
361; IF-EVL-NEXT:    [[C3:%.*]] = ptrtoint ptr [[C]] to i64
362; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
363; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
364; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
365; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
366; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
367; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 13, i64 [[TMP2]])
368; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
369; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
370; IF-EVL:       [[VECTOR_MEMCHECK]]:
371; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
372; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
373; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
374; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
375; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
376; IF-EVL-NEXT:    [[TMP25:%.*]] = mul i64 [[TMP5]], 4
377; IF-EVL-NEXT:    [[TMP26:%.*]] = sub i64 [[A1]], [[C3]]
378; IF-EVL-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP26]], [[TMP25]]
379; IF-EVL-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
380; IF-EVL-NEXT:    br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
381; IF-EVL:       [[VECTOR_PH]]:
382; IF-EVL-NEXT:    [[TMP27:%.*]] = call i64 @llvm.vscale.i64()
383; IF-EVL-NEXT:    [[TMP28:%.*]] = mul i64 [[TMP27]], 4
384; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP28]], 1
385; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
386; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP28]]
387; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
388; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
389; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
390; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
391; IF-EVL:       [[VECTOR_BODY]]:
392; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
393; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
394; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
395; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
396; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
397; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
398; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
399; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
400; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP10]]
401; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
402; IF-EVL-NEXT:    [[VP_OP_LOAD5:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
403; IF-EVL-NEXT:    [[TMP29:%.*]] = call <vscale x 4 x i32> @llvm.vp.umin.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD5]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
404; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
405; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
406; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP29]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
407; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
408; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
409; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
410; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
411; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
412; IF-EVL:       [[MIDDLE_BLOCK]]:
413; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
414; IF-EVL:       [[SCALAR_PH]]:
415; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
416; IF-EVL-NEXT:    br label %[[LOOP:.*]]
417; IF-EVL:       [[LOOP]]:
418; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
419; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
420; IF-EVL-NEXT:    [[TMP20:%.*]] = load i32, ptr [[GEP]], align 4
421; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
422; IF-EVL-NEXT:    [[TMP21:%.*]] = load i32, ptr [[GEP3]], align 4
423; IF-EVL-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP20]], i32 [[TMP21]])
424; IF-EVL-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
425; IF-EVL-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
426; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
427; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
428; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
429; IF-EVL:       [[EXIT]]:
430; IF-EVL-NEXT:    ret void
431;
432; NO-VP-LABEL: define void @vp_umin(
433; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
434; NO-VP-NEXT:  [[ENTRY:.*]]:
435; NO-VP-NEXT:    br label %[[LOOP:.*]]
436; NO-VP:       [[LOOP]]:
437; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
438; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
439; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
440; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]]
441; NO-VP-NEXT:    [[TMP1:%.*]] = load i32, ptr [[GEP3]], align 4
442; NO-VP-NEXT:    [[DOT:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
443; NO-VP-NEXT:    [[GEP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
444; NO-VP-NEXT:    store i32 [[DOT]], ptr [[GEP11]], align 4
445; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
446; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
447; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
448; NO-VP:       [[EXIT]]:
449; NO-VP-NEXT:    ret void
450;
451
452entry:
453  br label %loop
454
455loop:
456  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
457  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
458  %0 = load i32, ptr %gep, align 4
459  %gep3 = getelementptr inbounds i32, ptr %c, i64 %iv
460  %1 = load i32, ptr %gep3, align 4
461  %. = tail call i32 @llvm.umin.i32(i32 %0, i32 %1)
462  %gep11 = getelementptr inbounds i32, ptr %a, i64 %iv
463  store i32 %., ptr %gep11, align 4
464  %iv.next = add nuw nsw i64 %iv, 1
465  %exitcond.not = icmp eq i64 %iv.next, %N
466  br i1 %exitcond.not, label %exit, label %loop
467
468exit:
469  ret void
470}
471
472
473define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
474; IF-EVL-LABEL: define void @vp_ctlz(
475; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
476; IF-EVL-NEXT:  [[ENTRY:.*]]:
477; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
478; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
479; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
480; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
481; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
482; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
483; IF-EVL-NEXT:    br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
484; IF-EVL:       [[VECTOR_MEMCHECK]]:
485; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
486; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
487; IF-EVL-NEXT:    [[TMP20:%.*]] = mul i64 [[TMP5]], 4
488; IF-EVL-NEXT:    [[TMP21:%.*]] = sub i64 [[A1]], [[B2]]
489; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]]
490; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
491; IF-EVL:       [[VECTOR_PH]]:
492; IF-EVL-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
493; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 4
494; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP23]], 1
495; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
496; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]]
497; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
498; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
499; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
500; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
501; IF-EVL:       [[VECTOR_BODY]]:
502; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
503; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
504; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
505; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
506; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
507; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
508; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
509; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
510; IF-EVL-NEXT:    [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.ctlz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
511; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
512; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
513; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
514; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
515; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
516; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
517; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
518; IF-EVL-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
519; IF-EVL:       [[MIDDLE_BLOCK]]:
520; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
521; IF-EVL:       [[SCALAR_PH]]:
522; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
523; IF-EVL-NEXT:    br label %[[LOOP:.*]]
524; IF-EVL:       [[LOOP]]:
525; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
526; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
527; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[GEP]], align 4
528; IF-EVL-NEXT:    [[TMP19:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP18]], i1 true)
529; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
530; IF-EVL-NEXT:    store i32 [[TMP19]], ptr [[GEP3]], align 4
531; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
532; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
533; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
534; IF-EVL:       [[EXIT]]:
535; IF-EVL-NEXT:    ret void
536;
537; NO-VP-LABEL: define void @vp_ctlz(
538; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
539; NO-VP-NEXT:  [[ENTRY:.*]]:
540; NO-VP-NEXT:    br label %[[LOOP:.*]]
541; NO-VP:       [[LOOP]]:
542; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
543; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
544; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
545; NO-VP-NEXT:    [[TMP1:%.*]] = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[TMP0]], i1 true)
546; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
547; NO-VP-NEXT:    store i32 [[TMP1]], ptr [[GEP3]], align 4
548; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
549; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
550; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
551; NO-VP:       [[EXIT]]:
552; NO-VP-NEXT:    ret void
553;
554
555entry:
556  br label %loop
557
558loop:
559  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
560  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
561  %0 = load i32, ptr %gep, align 4
562  %1 = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %0, i1 true)
563  %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv
564  store i32 %1, ptr %gep3, align 4
565  %iv.next = add nuw nsw i64 %iv, 1
566  %exitcond.not = icmp eq i64 %iv.next, %N
567  br i1 %exitcond.not, label %exit, label %loop
568
569exit:
570  ret void
571}
572
573define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
574; IF-EVL-LABEL: define void @vp_cttz(
575; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
576; IF-EVL-NEXT:  [[ENTRY:.*]]:
577; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
578; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
579; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
580; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
581; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
582; IF-EVL-NEXT:    [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]]
583; IF-EVL-NEXT:    br i1 [[TMP3]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
584; IF-EVL:       [[VECTOR_MEMCHECK]]:
585; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
586; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
587; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
588; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[A1]], [[B2]]
589; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP7]], [[TMP6]]
590; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
591; IF-EVL:       [[VECTOR_PH]]:
592; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
593; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 4
594; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 [[TMP9]], 1
595; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP10]]
596; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP9]]
597; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
598; IF-EVL-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
599; IF-EVL-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 4
600; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
601; IF-EVL:       [[VECTOR_BODY]]:
602; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
603; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
604; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
605; IF-EVL-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
606; IF-EVL-NEXT:    [[TMP14:%.*]] = add i64 [[EVL_BASED_IV]], 0
607; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP14]]
608; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
609; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
610; IF-EVL-NEXT:    [[TMP17:%.*]] = call <vscale x 4 x i32> @llvm.vp.cttz.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
611; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
612; IF-EVL-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 0
613; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP17]], ptr align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP13]])
614; IF-EVL-NEXT:    [[TMP20:%.*]] = zext i32 [[TMP13]] to i64
615; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
616; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]]
617; IF-EVL-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
618; IF-EVL-NEXT:    br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
619; IF-EVL:       [[MIDDLE_BLOCK]]:
620; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
621; IF-EVL:       [[SCALAR_PH]]:
622; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
623; IF-EVL-NEXT:    br label %[[LOOP:.*]]
624; IF-EVL:       [[LOOP]]:
625; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
626; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
627; IF-EVL-NEXT:    [[TMP22:%.*]] = load i32, ptr [[GEP]], align 4
628; IF-EVL-NEXT:    [[TMP23:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP22]], i1 true)
629; IF-EVL-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
630; IF-EVL-NEXT:    store i32 [[TMP23]], ptr [[GEP3]], align 4
631; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
632; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
633; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
634; IF-EVL:       [[EXIT]]:
635; IF-EVL-NEXT:    ret void
636;
637; NO-VP-LABEL: define void @vp_cttz(
638; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
639; NO-VP-NEXT:  [[ENTRY:.*]]:
640; NO-VP-NEXT:    br label %[[LOOP:.*]]
641; NO-VP:       [[LOOP]]:
642; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
643; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
644; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
645; NO-VP-NEXT:    [[TMP1:%.*]] = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[TMP0]], i1 true)
646; NO-VP-NEXT:    [[GEP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
647; NO-VP-NEXT:    store i32 [[TMP1]], ptr [[GEP3]], align 4
648; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
649; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
650; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
651; NO-VP:       [[EXIT]]:
652; NO-VP-NEXT:    ret void
653;
654
655entry:
656  br label %loop
657
658loop:
659  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
660  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
661  %0 = load i32, ptr %gep, align 4
662  %1 = tail call range(i32 0, 33) i32 @llvm.cttz.i32(i32 %0, i1 true)
663  %gep3 = getelementptr inbounds i32, ptr %a, i64 %iv
664  store i32 %1, ptr %gep3, align 4
665  %iv.next = add nuw nsw i64 %iv, 1
666  %exitcond.not = icmp eq i64 %iv.next, %N
667  br i1 %exitcond.not, label %exit, label %loop
668
669exit:
670  ret void
671}
672
673define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
674; IF-EVL-LABEL: define void @vp_lrint(
675; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
676; IF-EVL-NEXT:  [[ENTRY:.*]]:
677; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
678; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
679; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
680; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
681; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
682; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 9, i64 [[TMP2]])
683; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
684; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
685; IF-EVL:       [[VECTOR_MEMCHECK]]:
686; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
687; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
688; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
689; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
690; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
691; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
692; IF-EVL:       [[VECTOR_PH]]:
693; IF-EVL-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
694; IF-EVL-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP25]], 4
695; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP26]], 1
696; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
697; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]]
698; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
699; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
700; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
701; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
702; IF-EVL:       [[VECTOR_BODY]]:
703; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
704; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
705; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
706; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
707; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
708; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
709; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
710; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
711; IF-EVL-NEXT:    [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
712; IF-EVL-NEXT:    [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
713; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
714; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
715; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
716; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
717; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
718; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
719; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
720; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
721; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
722; IF-EVL:       [[MIDDLE_BLOCK]]:
723; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
724; IF-EVL:       [[SCALAR_PH]]:
725; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
726; IF-EVL-NEXT:    br label %[[LOOP:.*]]
727; IF-EVL:       [[LOOP]]:
728; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
729; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
730; IF-EVL-NEXT:    [[TMP20:%.*]] = load float, ptr [[GEP]], align 4
731; IF-EVL-NEXT:    [[CONV2:%.*]] = fpext float [[TMP20]] to double
732; IF-EVL-NEXT:    [[TMP21:%.*]] = tail call i64 @llvm.lrint.i64.f64(double [[CONV2]])
733; IF-EVL-NEXT:    [[CONV3:%.*]] = trunc i64 [[TMP21]] to i32
734; IF-EVL-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
735; IF-EVL-NEXT:    store i32 [[CONV3]], ptr [[GEP5]], align 4
736; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
737; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
738; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
739; IF-EVL:       [[EXIT]]:
740; IF-EVL-NEXT:    ret void
741;
742; NO-VP-LABEL: define void @vp_lrint(
743; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
744; NO-VP-NEXT:  [[ENTRY:.*]]:
745; NO-VP-NEXT:    br label %[[LOOP:.*]]
746; NO-VP:       [[LOOP]]:
747; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
748; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
749; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[GEP]], align 4
750; NO-VP-NEXT:    [[CONV2:%.*]] = fpext float [[TMP0]] to double
751; NO-VP-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.lrint.i64.f64(double [[CONV2]])
752; NO-VP-NEXT:    [[CONV3:%.*]] = trunc i64 [[TMP1]] to i32
753; NO-VP-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
754; NO-VP-NEXT:    store i32 [[CONV3]], ptr [[GEP5]], align 4
755; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
756; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
757; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
758; NO-VP:       [[EXIT]]:
759; NO-VP-NEXT:    ret void
760;
761
762entry:
763  br label %loop
764
765loop:
766  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
767  %gep = getelementptr inbounds float, ptr %b, i64 %iv
768  %0 = load float, ptr %gep, align 4
769  %conv2 = fpext float %0 to double
770  %1 = tail call i64 @llvm.lrint.i64.f64(double %conv2)
771  %conv3 = trunc i64 %1 to i32
772  %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv
773  store i32 %conv3, ptr %gep5, align 4
774  %iv.next = add nuw nsw i64 %iv, 1
775  %exitcond.not = icmp eq i64 %iv.next, %N
776  br i1 %exitcond.not, label %exit, label %loop
777
778exit:
779  ret void
780}
781
782define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
783; IF-EVL-LABEL: define void @vp_llrint(
784; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
785; IF-EVL-NEXT:  [[ENTRY:.*]]:
786; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
787; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
788; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
789; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
790; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
791; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 9, i64 [[TMP2]])
792; IF-EVL-NEXT:    [[TMP22:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
793; IF-EVL-NEXT:    br i1 [[TMP22]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
794; IF-EVL:       [[VECTOR_MEMCHECK]]:
795; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
796; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
797; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP5]], 4
798; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 [[A1]], [[B2]]
799; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP24]], [[TMP23]]
800; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
801; IF-EVL:       [[VECTOR_PH]]:
802; IF-EVL-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
803; IF-EVL-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP25]], 4
804; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP26]], 1
805; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
806; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP26]]
807; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
808; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
809; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
810; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
811; IF-EVL:       [[VECTOR_BODY]]:
812; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
813; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
814; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
815; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
816; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
817; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
818; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
819; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
820; IF-EVL-NEXT:    [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
821; IF-EVL-NEXT:    [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
822; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
823; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
824; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
825; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
826; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP9]] to i64
827; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
828; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
829; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
830; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
831; IF-EVL:       [[MIDDLE_BLOCK]]:
832; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
833; IF-EVL:       [[SCALAR_PH]]:
834; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
835; IF-EVL-NEXT:    br label %[[LOOP:.*]]
836; IF-EVL:       [[LOOP]]:
837; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
838; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
839; IF-EVL-NEXT:    [[TMP20:%.*]] = load float, ptr [[GEP]], align 4
840; IF-EVL-NEXT:    [[CONV2:%.*]] = fpext float [[TMP20]] to double
841; IF-EVL-NEXT:    [[TMP21:%.*]] = tail call i64 @llvm.llrint.i64.f64(double [[CONV2]])
842; IF-EVL-NEXT:    [[CONV3:%.*]] = trunc i64 [[TMP21]] to i32
843; IF-EVL-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
844; IF-EVL-NEXT:    store i32 [[CONV3]], ptr [[GEP5]], align 4
845; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
846; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
847; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
848; IF-EVL:       [[EXIT]]:
849; IF-EVL-NEXT:    ret void
850;
851; NO-VP-LABEL: define void @vp_llrint(
852; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
853; NO-VP-NEXT:  [[ENTRY:.*]]:
854; NO-VP-NEXT:    br label %[[LOOP:.*]]
855; NO-VP:       [[LOOP]]:
856; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
857; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
858; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[GEP]], align 4
859; NO-VP-NEXT:    [[CONV2:%.*]] = fpext float [[TMP0]] to double
860; NO-VP-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.llrint.i64.f64(double [[CONV2]])
861; NO-VP-NEXT:    [[CONV3:%.*]] = trunc i64 [[TMP1]] to i32
862; NO-VP-NEXT:    [[GEP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
863; NO-VP-NEXT:    store i32 [[CONV3]], ptr [[GEP5]], align 4
864; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
865; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
866; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
867; NO-VP:       [[EXIT]]:
868; NO-VP-NEXT:    ret void
869;
870
871entry:
872  br label %loop
873
874loop:
875  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
876  %gep = getelementptr inbounds float, ptr %b, i64 %iv
877  %0 = load float, ptr %gep, align 4
878  %conv2 = fpext float %0 to double
879  %1 = tail call i64 @llvm.llrint.i64.f64(double %conv2)
880  %conv3 = trunc i64 %1 to i32
881  %gep5 = getelementptr inbounds i32, ptr %a, i64 %iv
882  store i32 %conv3, ptr %gep5, align 4
883  %iv.next = add nuw nsw i64 %iv, 1
884  %exitcond.not = icmp eq i64 %iv.next, %N
885  br i1 %exitcond.not, label %exit, label %loop
886
887exit:
888  ret void
889}
890
891define void @vp_abs(ptr %a, ptr %b, i64 %N) {
892; IF-EVL-LABEL: define void @vp_abs(
893; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
894; IF-EVL-NEXT:  [[ENTRY:.*]]:
895; IF-EVL-NEXT:    [[B2:%.*]] = ptrtoint ptr [[B]] to i64
896; IF-EVL-NEXT:    [[A1:%.*]] = ptrtoint ptr [[A]] to i64
897; IF-EVL-NEXT:    [[TMP0:%.*]] = sub i64 -1, [[N]]
898; IF-EVL-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
899; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
900; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP2]])
901; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp ult i64 [[TMP0]], [[TMP3]]
902; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
903; IF-EVL:       [[VECTOR_MEMCHECK]]:
904; IF-EVL-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
905; IF-EVL-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
906; IF-EVL-NEXT:    [[TMP20:%.*]] = mul i64 [[TMP5]], 4
907; IF-EVL-NEXT:    [[TMP21:%.*]] = sub i64 [[A1]], [[B2]]
908; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP21]], [[TMP20]]
909; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
910; IF-EVL:       [[VECTOR_PH]]:
911; IF-EVL-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
912; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 4
913; IF-EVL-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP23]], 1
914; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP6]]
915; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP23]]
916; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
917; IF-EVL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
918; IF-EVL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
919; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
920; IF-EVL:       [[VECTOR_BODY]]:
921; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
922; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
923; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]]
924; IF-EVL-NEXT:    [[TMP9:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
925; IF-EVL-NEXT:    [[TMP10:%.*]] = add i64 [[EVL_BASED_IV]], 0
926; IF-EVL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP10]]
927; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i32 0
928; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
929; IF-EVL-NEXT:    [[TMP24:%.*]] = call <vscale x 4 x i32> @llvm.vp.abs.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], i1 true, <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
930; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
931; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0
932; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP24]], ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
933; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
934; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
935; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
936; IF-EVL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
937; IF-EVL-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
938; IF-EVL:       [[MIDDLE_BLOCK]]:
939; IF-EVL-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
940; IF-EVL:       [[SCALAR_PH]]:
941; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[ENTRY]] ]
942; IF-EVL-NEXT:    br label %[[LOOP:.*]]
943; IF-EVL:       [[LOOP]]:
944; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
945; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
946; IF-EVL-NEXT:    [[TMP18:%.*]] = load i32, ptr [[GEP]], align 4
947; IF-EVL-NEXT:    [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[TMP18]], i1 true)
948; IF-EVL-NEXT:    [[GEP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
949; IF-EVL-NEXT:    store i32 [[COND]], ptr [[GEP9]], align 4
950; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
951; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
952; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]]
953; IF-EVL:       [[EXIT]]:
954; IF-EVL-NEXT:    ret void
955;
956; NO-VP-LABEL: define void @vp_abs(
957; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
958; NO-VP-NEXT:  [[ENTRY:.*]]:
959; NO-VP-NEXT:    br label %[[LOOP:.*]]
960; NO-VP:       [[LOOP]]:
961; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
962; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]]
963; NO-VP-NEXT:    [[TMP0:%.*]] = load i32, ptr [[GEP]], align 4
964; NO-VP-NEXT:    [[COND:%.*]] = tail call i32 @llvm.abs.i32(i32 [[TMP0]], i1 true)
965; NO-VP-NEXT:    [[GEP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
966; NO-VP-NEXT:    store i32 [[COND]], ptr [[GEP9]], align 4
967; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
968; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
969; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
970; NO-VP:       [[EXIT]]:
971; NO-VP-NEXT:    ret void
972;
973
974entry:
975  br label %loop
976
977loop:
978  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
979  %gep = getelementptr inbounds i32, ptr %b, i64 %iv
980  %0 = load i32, ptr %gep, align 4
981  %cond = tail call i32 @llvm.abs.i32(i32 %0, i1 true)
982  %gep9 = getelementptr inbounds i32, ptr %a, i64 %iv
983  store i32 %cond, ptr %gep9, align 4
984  %iv.next = add nuw nsw i64 %iv, 1
985  %exitcond.not = icmp eq i64 %iv.next, %N
986  br i1 %exitcond.not, label %exit, label %loop
987
988exit:
989  ret void
990}
991
992; There's no @llvm.vp.log10, so don't transform it.
993define void @log10(ptr %a, ptr %b, i64 %N) {
994; IF-EVL-LABEL: define void @log10(
995; IF-EVL-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
996; IF-EVL-NEXT:  [[ENTRY:.*]]:
997; IF-EVL-NEXT:    br label %[[LOOP:.*]]
998; IF-EVL:       [[LOOP]]:
999; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
1000; IF-EVL-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1001; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[GEP]], align 4
1002; IF-EVL-NEXT:    [[COND:%.*]] = tail call float @llvm.log10.f32(float [[TMP0]])
1003; IF-EVL-NEXT:    [[GEP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1004; IF-EVL-NEXT:    store float [[COND]], ptr [[GEP9]], align 4
1005; IF-EVL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1006; IF-EVL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1007; IF-EVL-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
1008; IF-EVL:       [[EXIT]]:
1009; IF-EVL-NEXT:    ret void
1010;
1011; NO-VP-LABEL: define void @log10(
1012; NO-VP-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
1013; NO-VP-NEXT:  [[ENTRY:.*]]:
1014; NO-VP-NEXT:    br label %[[LOOP:.*]]
1015; NO-VP:       [[LOOP]]:
1016; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
1017; NO-VP-NEXT:    [[GEP:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]]
1018; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[GEP]], align 4
1019; NO-VP-NEXT:    [[COND:%.*]] = tail call float @llvm.log10.f32(float [[TMP0]])
1020; NO-VP-NEXT:    [[GEP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
1021; NO-VP-NEXT:    store float [[COND]], ptr [[GEP9]], align 4
1022; NO-VP-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1023; NO-VP-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
1024; NO-VP-NEXT:    br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
1025; NO-VP:       [[EXIT]]:
1026; NO-VP-NEXT:    ret void
1027;
1028
1029entry:
1030  br label %loop
1031
1032loop:
1033  %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
1034  %gep = getelementptr inbounds float, ptr %b, i64 %iv
1035  %0 = load float, ptr %gep, align 4
1036  %cond = tail call float @llvm.log10.f32(float %0)
1037  %gep9 = getelementptr inbounds float, ptr %a, i64 %iv
1038  store float %cond, ptr %gep9, align 4
1039  %iv.next = add nuw nsw i64 %iv, 1
1040  %exitcond.not = icmp eq i64 %iv.next, %N
1041  br i1 %exitcond.not, label %exit, label %loop
1042
1043exit:
1044  ret void
1045}
1046
1047
1048declare i32 @llvm.smax.i32(i32, i32)
1049declare i32 @llvm.smin.i32(i32, i32)
1050declare i32 @llvm.umax.i32(i32, i32)
1051declare i32 @llvm.umin.i32(i32, i32)
1052declare i32 @llvm.ctlz.i32(i32, i1 immarg)
1053declare i32 @llvm.cttz.i32(i32, i1 immarg)
1054declare i64 @llvm.lrint.i64.f64(double)
1055declare i64 @llvm.llrint.i64.f64(double)
1056declare i32 @llvm.abs.i32(i32, i1 immarg)
1057