1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes=loop-vectorize \
3; RUN: -force-tail-folding-style=data-with-evl \
4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=IF-EVL
6
7; RUN: opt -passes=loop-vectorize \
8; RUN: -force-tail-folding-style=none \
9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
10; RUN: -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s --check-prefix=NO-VP
11
12
13define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
14; IF-EVL-LABEL: define void @test_and(
15; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
16; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
17; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
18; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
19; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
20; IF-EVL:       [[VECTOR_MEMCHECK]]:
21; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
22; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
23; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
24; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
25; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
26; IF-EVL:       [[VECTOR_PH]]:
27; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
28; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
29; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
30; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
31; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
32; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
33; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
34; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
35; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
36; IF-EVL:       [[VECTOR_BODY]]:
37; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
38; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
39; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
40; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
41; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
42; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
43; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
44; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
45; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
46; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
47; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
48; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
49; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
50; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
51; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
52; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
54; IF-EVL:       [[MIDDLE_BLOCK]]:
55; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
56; IF-EVL:       [[SCALAR_PH]]:
57; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
58; IF-EVL-NEXT:    br label %[[LOOP:.*]]
59; IF-EVL:       [[LOOP]]:
60; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
61; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
62; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
63; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
64; IF-EVL-NEXT:    [[TMP:%.*]] = and i8 [[TMP20]], 1
65; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
66; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
67; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
68; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
69; IF-EVL:       [[FINISH_LOOPEXIT]]:
70; IF-EVL-NEXT:    ret void
71;
72; NO-VP-LABEL: define void @test_and(
73; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
74; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
75; NO-VP-NEXT:    br label %[[LOOP:.*]]
76; NO-VP:       [[LOOP]]:
77; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
78; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
79; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
80; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
81; NO-VP-NEXT:    [[TMP:%.*]] = and i8 [[TMP0]], 1
82; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
83; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
84; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
85; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
86; NO-VP:       [[FINISH_LOOPEXIT]]:
87; NO-VP-NEXT:    ret void
88;
89loop.preheader:
90  br label %loop
91
92loop:
93  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
94  %dec = add nsw i64 %len, 1
95  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
96  %0 = load i8, ptr %arrayidx, align 1
97  %tmp = and i8 %0, 1
98  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
99  store i8 %tmp, ptr %arrayidx1, align 1
100  %.not = icmp eq i64 %dec, 100
101  br i1 %.not, label %finish.loopexit, label %loop
102
103finish.loopexit:
104  ret void
105}
106
107define void @test_or(ptr nocapture %a, ptr nocapture readonly %b) {
108; IF-EVL-LABEL: define void @test_or(
109; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
110; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
111; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
112; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
113; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
114; IF-EVL:       [[VECTOR_MEMCHECK]]:
115; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
116; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
117; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
118; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
119; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
120; IF-EVL:       [[VECTOR_PH]]:
121; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
122; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
123; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
124; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
125; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
126; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
127; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
128; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
129; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
130; IF-EVL:       [[VECTOR_BODY]]:
131; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
132; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
133; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
134; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
135; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
136; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
137; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
138; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
139; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.or.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
140; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
141; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
142; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
143; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
144; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
145; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
146; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
147; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
148; IF-EVL:       [[MIDDLE_BLOCK]]:
149; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
150; IF-EVL:       [[SCALAR_PH]]:
151; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
152; IF-EVL-NEXT:    br label %[[LOOP:.*]]
153; IF-EVL:       [[LOOP]]:
154; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
155; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
156; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
157; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
158; IF-EVL-NEXT:    [[TMP:%.*]] = or i8 [[TMP20]], 1
159; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
160; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
161; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
162; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
163; IF-EVL:       [[FINISH_LOOPEXIT]]:
164; IF-EVL-NEXT:    ret void
165;
166; NO-VP-LABEL: define void @test_or(
167; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
168; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
169; NO-VP-NEXT:    br label %[[LOOP:.*]]
170; NO-VP:       [[LOOP]]:
171; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
172; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
173; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
174; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
175; NO-VP-NEXT:    [[TMP:%.*]] = or i8 [[TMP0]], 1
176; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
177; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
178; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
179; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
180; NO-VP:       [[FINISH_LOOPEXIT]]:
181; NO-VP-NEXT:    ret void
182;
183loop.preheader:
184  br label %loop
185
186loop:
187  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
188  %dec = add nsw i64 %len, 1
189  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
190  %0 = load i8, ptr %arrayidx, align 1
191  %tmp = or i8 %0, 1
192  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
193  store i8 %tmp, ptr %arrayidx1, align 1
194  %.not = icmp eq i64 %dec, 100
195  br i1 %.not, label %finish.loopexit, label %loop
196
197finish.loopexit:
198  ret void
199}
200
201define void @test_xor(ptr nocapture %a, ptr nocapture readonly %b) {
202; IF-EVL-LABEL: define void @test_xor(
203; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
204; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
205; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
206; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
207; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
208; IF-EVL:       [[VECTOR_MEMCHECK]]:
209; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
210; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
211; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
212; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
213; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
214; IF-EVL:       [[VECTOR_PH]]:
215; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
216; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
217; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
218; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
219; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
220; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
221; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
222; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
223; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
224; IF-EVL:       [[VECTOR_BODY]]:
225; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
226; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
227; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
228; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
229; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
230; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
231; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
232; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
233; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.xor.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
234; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
235; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
236; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
237; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
238; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
239; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
240; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
241; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
242; IF-EVL:       [[MIDDLE_BLOCK]]:
243; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
244; IF-EVL:       [[SCALAR_PH]]:
245; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
246; IF-EVL-NEXT:    br label %[[LOOP:.*]]
247; IF-EVL:       [[LOOP]]:
248; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
249; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
250; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
251; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
252; IF-EVL-NEXT:    [[TMP:%.*]] = xor i8 [[TMP20]], 1
253; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
254; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
255; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
256; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
257; IF-EVL:       [[FINISH_LOOPEXIT]]:
258; IF-EVL-NEXT:    ret void
259;
260; NO-VP-LABEL: define void @test_xor(
261; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
262; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
263; NO-VP-NEXT:    br label %[[LOOP:.*]]
264; NO-VP:       [[LOOP]]:
265; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
266; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
267; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
268; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
269; NO-VP-NEXT:    [[TMP:%.*]] = xor i8 [[TMP0]], 1
270; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
271; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
272; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
273; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
274; NO-VP:       [[FINISH_LOOPEXIT]]:
275; NO-VP-NEXT:    ret void
276;
277loop.preheader:
278  br label %loop
279
280loop:
281  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
282  %dec = add nsw i64 %len, 1
283  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
284  %0 = load i8, ptr %arrayidx, align 1
285  %tmp = xor i8 %0, 1
286  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
287  store i8 %tmp, ptr %arrayidx1, align 1
288  %.not = icmp eq i64 %dec, 100
289  br i1 %.not, label %finish.loopexit, label %loop
290
291finish.loopexit:
292  ret void
293}
294
295define void @test_shl(ptr nocapture %a, ptr nocapture readonly %b) {
296; IF-EVL-LABEL: define void @test_shl(
297; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
298; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
299; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
300; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
301; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
302; IF-EVL:       [[VECTOR_MEMCHECK]]:
303; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
304; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
305; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
306; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
307; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
308; IF-EVL:       [[VECTOR_PH]]:
309; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
310; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
311; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
312; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
313; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
314; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
315; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
316; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
317; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
318; IF-EVL:       [[VECTOR_BODY]]:
319; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
320; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
321; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
322; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
323; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
324; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
325; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
326; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
327; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.shl.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
328; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
329; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
330; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
331; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
332; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
333; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
334; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
335; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
336; IF-EVL:       [[MIDDLE_BLOCK]]:
337; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
338; IF-EVL:       [[SCALAR_PH]]:
339; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
340; IF-EVL-NEXT:    br label %[[LOOP:.*]]
341; IF-EVL:       [[LOOP]]:
342; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
343; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
344; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
345; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
346; IF-EVL-NEXT:    [[TMP:%.*]] = shl i8 [[TMP20]], 1
347; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
348; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
349; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
350; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
351; IF-EVL:       [[FINISH_LOOPEXIT]]:
352; IF-EVL-NEXT:    ret void
353;
354; NO-VP-LABEL: define void @test_shl(
355; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
356; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
357; NO-VP-NEXT:    br label %[[LOOP:.*]]
358; NO-VP:       [[LOOP]]:
359; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
360; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
361; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
362; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
363; NO-VP-NEXT:    [[TMP:%.*]] = shl i8 [[TMP0]], 1
364; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
365; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
366; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
367; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
368; NO-VP:       [[FINISH_LOOPEXIT]]:
369; NO-VP-NEXT:    ret void
370;
371loop.preheader:
372  br label %loop
373
374loop:
375  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
376  %dec = add nsw i64 %len, 1
377  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
378  %0 = load i8, ptr %arrayidx, align 1
379  %tmp = shl i8 %0, 1
380  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
381  store i8 %tmp, ptr %arrayidx1, align 1
382  %.not = icmp eq i64 %dec, 100
383  br i1 %.not, label %finish.loopexit, label %loop
384
385finish.loopexit:
386  ret void
387}
388
389define void @test_lshr(ptr nocapture %a, ptr nocapture readonly %b) {
390; IF-EVL-LABEL: define void @test_lshr(
391; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
392; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
393; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
394; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
395; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
396; IF-EVL:       [[VECTOR_MEMCHECK]]:
397; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
398; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
399; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
400; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
401; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
402; IF-EVL:       [[VECTOR_PH]]:
403; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
404; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
405; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
406; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
407; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
408; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
409; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
410; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
411; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
412; IF-EVL:       [[VECTOR_BODY]]:
413; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
414; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
415; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
416; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
417; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
418; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
419; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
420; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
421; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.lshr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
422; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
423; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
424; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
425; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
426; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
427; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
428; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
429; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
430; IF-EVL:       [[MIDDLE_BLOCK]]:
431; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
432; IF-EVL:       [[SCALAR_PH]]:
433; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
434; IF-EVL-NEXT:    br label %[[LOOP:.*]]
435; IF-EVL:       [[LOOP]]:
436; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
437; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
438; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
439; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
440; IF-EVL-NEXT:    [[TMP:%.*]] = lshr i8 [[TMP20]], 1
441; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
442; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
443; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
444; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
445; IF-EVL:       [[FINISH_LOOPEXIT]]:
446; IF-EVL-NEXT:    ret void
447;
448; NO-VP-LABEL: define void @test_lshr(
449; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
450; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
451; NO-VP-NEXT:    br label %[[LOOP:.*]]
452; NO-VP:       [[LOOP]]:
453; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
454; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
455; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
456; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
457; NO-VP-NEXT:    [[TMP:%.*]] = lshr i8 [[TMP0]], 1
458; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
459; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
460; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
461; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
462; NO-VP:       [[FINISH_LOOPEXIT]]:
463; NO-VP-NEXT:    ret void
464;
465loop.preheader:
466  br label %loop
467
468loop:
469  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
470  %dec = add nsw i64 %len, 1
471  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
472  %0 = load i8, ptr %arrayidx, align 1
473  %tmp = lshr i8 %0, 1
474  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
475  store i8 %tmp, ptr %arrayidx1, align 1
476  %.not = icmp eq i64 %dec, 100
477  br i1 %.not, label %finish.loopexit, label %loop
478
479finish.loopexit:
480  ret void
481}
482
483define void @test_ashr(ptr nocapture %a, ptr nocapture readonly %b) {
484; IF-EVL-LABEL: define void @test_ashr(
485; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
486; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
487; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
488; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
489; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
490; IF-EVL:       [[VECTOR_MEMCHECK]]:
491; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
492; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
493; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
494; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
495; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
496; IF-EVL:       [[VECTOR_PH]]:
497; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
498; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
499; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
500; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
501; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
502; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
503; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
504; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
505; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
506; IF-EVL:       [[VECTOR_BODY]]:
507; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
508; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
509; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
510; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
511; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
512; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
513; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
514; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
515; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.ashr.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
516; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
517; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
518; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
519; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
520; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
521; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
522; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
523; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
524; IF-EVL:       [[MIDDLE_BLOCK]]:
525; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
526; IF-EVL:       [[SCALAR_PH]]:
527; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
528; IF-EVL-NEXT:    br label %[[LOOP:.*]]
529; IF-EVL:       [[LOOP]]:
530; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
531; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
532; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
533; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
534; IF-EVL-NEXT:    [[TMP:%.*]] = ashr i8 [[TMP20]], 1
535; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
536; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
537; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
538; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
539; IF-EVL:       [[FINISH_LOOPEXIT]]:
540; IF-EVL-NEXT:    ret void
541;
542; NO-VP-LABEL: define void @test_ashr(
543; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
544; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
545; NO-VP-NEXT:    br label %[[LOOP:.*]]
546; NO-VP:       [[LOOP]]:
547; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
548; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
549; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
550; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
551; NO-VP-NEXT:    [[TMP:%.*]] = ashr i8 [[TMP0]], 1
552; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
553; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
554; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
555; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
556; NO-VP:       [[FINISH_LOOPEXIT]]:
557; NO-VP-NEXT:    ret void
558;
559loop.preheader:
560  br label %loop
561
562loop:
563  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
564  %dec = add nsw i64 %len, 1
565  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
566  %0 = load i8, ptr %arrayidx, align 1
567  %tmp = ashr i8 %0, 1
568  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
569  store i8 %tmp, ptr %arrayidx1, align 1
570  %.not = icmp eq i64 %dec, 100
571  br i1 %.not, label %finish.loopexit, label %loop
572
573finish.loopexit:
574  ret void
575}
576
577define void @test_add(ptr nocapture %a, ptr nocapture readonly %b) {
578; IF-EVL-LABEL: define void @test_add(
579; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
580; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
581; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
582; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
583; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
584; IF-EVL:       [[VECTOR_MEMCHECK]]:
585; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
586; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
587; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
588; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
589; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
590; IF-EVL:       [[VECTOR_PH]]:
591; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
592; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
593; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
594; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
595; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
596; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
597; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
598; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
599; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
600; IF-EVL:       [[VECTOR_BODY]]:
601; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
602; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
603; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
604; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
605; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
606; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
607; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
608; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
609; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.add.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
610; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
611; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
612; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
613; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
614; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
615; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
616; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
617; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
618; IF-EVL:       [[MIDDLE_BLOCK]]:
619; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
620; IF-EVL:       [[SCALAR_PH]]:
621; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
622; IF-EVL-NEXT:    br label %[[LOOP:.*]]
623; IF-EVL:       [[LOOP]]:
624; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
625; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
626; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
627; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
628; IF-EVL-NEXT:    [[TMP:%.*]] = add i8 [[TMP20]], 1
629; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
630; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
631; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
632; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
633; IF-EVL:       [[FINISH_LOOPEXIT]]:
634; IF-EVL-NEXT:    ret void
635;
636; NO-VP-LABEL: define void @test_add(
637; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
638; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
639; NO-VP-NEXT:    br label %[[LOOP:.*]]
640; NO-VP:       [[LOOP]]:
641; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
642; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
643; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
644; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
645; NO-VP-NEXT:    [[TMP:%.*]] = add i8 [[TMP0]], 1
646; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
647; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
648; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
649; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
650; NO-VP:       [[FINISH_LOOPEXIT]]:
651; NO-VP-NEXT:    ret void
652;
653loop.preheader:
654  br label %loop
655
656loop:
657  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
658  %dec = add nsw i64 %len, 1
659  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
660  %0 = load i8, ptr %arrayidx, align 1
661  %tmp = add i8 %0, 1
662  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
663  store i8 %tmp, ptr %arrayidx1, align 1
664  %.not = icmp eq i64 %dec, 100
665  br i1 %.not, label %finish.loopexit, label %loop
666
667finish.loopexit:
668  ret void
669}
670
671define void @test_sub(ptr nocapture %a, ptr nocapture readonly %b) {
672; IF-EVL-LABEL: define void @test_sub(
673; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
674; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
675; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
676; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
677; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
678; IF-EVL:       [[VECTOR_MEMCHECK]]:
679; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
680; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
681; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
682; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
683; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
684; IF-EVL:       [[VECTOR_PH]]:
685; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
686; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
687; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
688; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
689; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
690; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
691; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
692; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
693; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
694; IF-EVL:       [[VECTOR_BODY]]:
695; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
696; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
697; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
698; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
699; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
700; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
701; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
702; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
703; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sub.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 1), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
704; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
705; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
706; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
707; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
708; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
709; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
710; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
711; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
712; IF-EVL:       [[MIDDLE_BLOCK]]:
713; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
714; IF-EVL:       [[SCALAR_PH]]:
715; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
716; IF-EVL-NEXT:    br label %[[LOOP:.*]]
717; IF-EVL:       [[LOOP]]:
718; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
719; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
720; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
721; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
722; IF-EVL-NEXT:    [[TMP:%.*]] = sub i8 [[TMP20]], 1
723; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
724; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
725; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
726; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
727; IF-EVL:       [[FINISH_LOOPEXIT]]:
728; IF-EVL-NEXT:    ret void
729;
730; NO-VP-LABEL: define void @test_sub(
731; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
732; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
733; NO-VP-NEXT:    br label %[[LOOP:.*]]
734; NO-VP:       [[LOOP]]:
735; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
736; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
737; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
738; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
739; NO-VP-NEXT:    [[TMP:%.*]] = sub i8 [[TMP0]], 1
740; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
741; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
742; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
743; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
744; NO-VP:       [[FINISH_LOOPEXIT]]:
745; NO-VP-NEXT:    ret void
746;
747loop.preheader:
748  br label %loop
749
750loop:
751  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
752  %dec = add nsw i64 %len, 1
753  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
754  %0 = load i8, ptr %arrayidx, align 1
755  %tmp = sub i8 %0, 1
756  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
757  store i8 %tmp, ptr %arrayidx1, align 1
758  %.not = icmp eq i64 %dec, 100
759  br i1 %.not, label %finish.loopexit, label %loop
760
761finish.loopexit:
762  ret void
763}
764
765define void @test_mul(ptr nocapture %a, ptr nocapture readonly %b) {
766; IF-EVL-LABEL: define void @test_mul(
767; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
768; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
769; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
770; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
771; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
772; IF-EVL:       [[VECTOR_MEMCHECK]]:
773; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
774; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
775; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
776; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
777; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
778; IF-EVL:       [[VECTOR_PH]]:
779; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
780; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
781; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
782; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
783; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
784; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
785; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
786; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
787; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
788; IF-EVL:       [[VECTOR_BODY]]:
789; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
790; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
791; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
792; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
793; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
794; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
795; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
796; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
797; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.mul.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
798; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
799; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
800; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
801; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
802; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
803; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
804; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
805; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
806; IF-EVL:       [[MIDDLE_BLOCK]]:
807; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
808; IF-EVL:       [[SCALAR_PH]]:
809; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
810; IF-EVL-NEXT:    br label %[[LOOP:.*]]
811; IF-EVL:       [[LOOP]]:
812; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
813; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
814; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
815; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
816; IF-EVL-NEXT:    [[TMP:%.*]] = mul i8 [[TMP20]], 3
817; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
818; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
819; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
820; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
821; IF-EVL:       [[FINISH_LOOPEXIT]]:
822; IF-EVL-NEXT:    ret void
823;
824; NO-VP-LABEL: define void @test_mul(
825; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
826; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
827; NO-VP-NEXT:    br label %[[LOOP:.*]]
828; NO-VP:       [[LOOP]]:
829; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
830; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
831; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
832; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
833; NO-VP-NEXT:    [[TMP:%.*]] = mul i8 [[TMP0]], 3
834; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
835; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
836; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
837; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
838; NO-VP:       [[FINISH_LOOPEXIT]]:
839; NO-VP-NEXT:    ret void
840;
841loop.preheader:
842  br label %loop
843
844loop:
845  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
846  %dec = add nsw i64 %len, 1
847  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
848  %0 = load i8, ptr %arrayidx, align 1
849  %tmp = mul i8 %0, 3
850  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
851  store i8 %tmp, ptr %arrayidx1, align 1
852  %.not = icmp eq i64 %dec, 100
853  br i1 %.not, label %finish.loopexit, label %loop
854
855finish.loopexit:
856  ret void
857}
858
859define void @test_sdiv(ptr nocapture %a, ptr nocapture readonly %b) {
860; IF-EVL-LABEL: define void @test_sdiv(
861; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
862; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
863; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
864; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
865; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
866; IF-EVL:       [[VECTOR_MEMCHECK]]:
867; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
868; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
869; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
870; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
871; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
872; IF-EVL:       [[VECTOR_PH]]:
873; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
874; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
875; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
876; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
877; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
878; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
879; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
880; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
881; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
882; IF-EVL:       [[VECTOR_BODY]]:
883; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
884; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
885; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
886; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
887; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
888; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
889; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
890; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
891; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.sdiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
892; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
893; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
894; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
895; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
896; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
897; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
898; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
899; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
900; IF-EVL:       [[MIDDLE_BLOCK]]:
901; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
902; IF-EVL:       [[SCALAR_PH]]:
903; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
904; IF-EVL-NEXT:    br label %[[LOOP:.*]]
905; IF-EVL:       [[LOOP]]:
906; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
907; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
908; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
909; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
910; IF-EVL-NEXT:    [[TMP:%.*]] = sdiv i8 [[TMP20]], 3
911; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
912; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
913; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
914; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
915; IF-EVL:       [[FINISH_LOOPEXIT]]:
916; IF-EVL-NEXT:    ret void
917;
918; NO-VP-LABEL: define void @test_sdiv(
919; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
920; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
921; NO-VP-NEXT:    br label %[[LOOP:.*]]
922; NO-VP:       [[LOOP]]:
923; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
924; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
925; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
926; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
927; NO-VP-NEXT:    [[TMP:%.*]] = sdiv i8 [[TMP0]], 3
928; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
929; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
930; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
931; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
932; NO-VP:       [[FINISH_LOOPEXIT]]:
933; NO-VP-NEXT:    ret void
934;
935loop.preheader:
936  br label %loop
937
938loop:
939  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
940  %dec = add nsw i64 %len, 1
941  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
942  %0 = load i8, ptr %arrayidx, align 1
943  %tmp = sdiv i8 %0, 3
944  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
945  store i8 %tmp, ptr %arrayidx1, align 1
946  %.not = icmp eq i64 %dec, 100
947  br i1 %.not, label %finish.loopexit, label %loop
948
949finish.loopexit:
950  ret void
951}
952
953define void @test_udiv(ptr nocapture %a, ptr nocapture readonly %b) {
954; IF-EVL-LABEL: define void @test_udiv(
955; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
956; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
957; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
958; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
959; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
960; IF-EVL:       [[VECTOR_MEMCHECK]]:
961; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
962; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
963; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
964; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
965; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
966; IF-EVL:       [[VECTOR_PH]]:
967; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
968; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
969; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
970; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
971; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
972; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
973; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
974; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
975; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
976; IF-EVL:       [[VECTOR_BODY]]:
977; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
978; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
979; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
980; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
981; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
982; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
983; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
984; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
985; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.udiv.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
986; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
987; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
988; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
989; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
990; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
991; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
992; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
993; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
994; IF-EVL:       [[MIDDLE_BLOCK]]:
995; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
996; IF-EVL:       [[SCALAR_PH]]:
997; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
998; IF-EVL-NEXT:    br label %[[LOOP:.*]]
999; IF-EVL:       [[LOOP]]:
1000; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1001; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1002; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1003; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1004; IF-EVL-NEXT:    [[TMP:%.*]] = udiv i8 [[TMP20]], 3
1005; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1006; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1007; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1008; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
1009; IF-EVL:       [[FINISH_LOOPEXIT]]:
1010; IF-EVL-NEXT:    ret void
1011;
1012; NO-VP-LABEL: define void @test_udiv(
1013; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1014; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1015; NO-VP-NEXT:    br label %[[LOOP:.*]]
1016; NO-VP:       [[LOOP]]:
1017; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1018; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1019; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1020; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1021; NO-VP-NEXT:    [[TMP:%.*]] = udiv i8 [[TMP0]], 3
1022; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1023; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1024; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1025; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1026; NO-VP:       [[FINISH_LOOPEXIT]]:
1027; NO-VP-NEXT:    ret void
1028;
1029loop.preheader:
1030  br label %loop
1031
1032loop:
1033  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1034  %dec = add nsw i64 %len, 1
1035  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
1036  %0 = load i8, ptr %arrayidx, align 1
1037  %tmp = udiv i8 %0, 3
1038  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
1039  store i8 %tmp, ptr %arrayidx1, align 1
1040  %.not = icmp eq i64 %dec, 100
1041  br i1 %.not, label %finish.loopexit, label %loop
1042
1043finish.loopexit:
1044  ret void
1045}
1046
1047define void @test_srem(ptr nocapture %a, ptr nocapture readonly %b) {
1048; IF-EVL-LABEL: define void @test_srem(
1049; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1050; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1051; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1052; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1053; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1054; IF-EVL:       [[VECTOR_MEMCHECK]]:
1055; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1056; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
1057; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
1058; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
1059; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1060; IF-EVL:       [[VECTOR_PH]]:
1061; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1062; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
1063; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
1064; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
1065; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
1066; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1067; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
1068; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
1069; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1070; IF-EVL:       [[VECTOR_BODY]]:
1071; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1072; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1073; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1074; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
1075; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
1076; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
1077; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
1078; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1079; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.srem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1080; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
1081; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
1082; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1083; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
1084; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
1085; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
1086; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1087; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
1088; IF-EVL:       [[MIDDLE_BLOCK]]:
1089; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1090; IF-EVL:       [[SCALAR_PH]]:
1091; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1092; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1093; IF-EVL:       [[LOOP]]:
1094; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1095; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1096; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1097; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1098; IF-EVL-NEXT:    [[TMP:%.*]] = srem i8 [[TMP20]], 3
1099; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1100; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1101; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1102; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
1103; IF-EVL:       [[FINISH_LOOPEXIT]]:
1104; IF-EVL-NEXT:    ret void
1105;
1106; NO-VP-LABEL: define void @test_srem(
1107; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1108; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1109; NO-VP-NEXT:    br label %[[LOOP:.*]]
1110; NO-VP:       [[LOOP]]:
1111; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1112; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1113; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1114; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1115; NO-VP-NEXT:    [[TMP:%.*]] = srem i8 [[TMP0]], 3
1116; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1117; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1118; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1119; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1120; NO-VP:       [[FINISH_LOOPEXIT]]:
1121; NO-VP-NEXT:    ret void
1122;
1123loop.preheader:
1124  br label %loop
1125
1126loop:
1127  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1128  %dec = add nsw i64 %len, 1
1129  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
1130  %0 = load i8, ptr %arrayidx, align 1
1131  %tmp = srem i8 %0, 3
1132  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
1133  store i8 %tmp, ptr %arrayidx1, align 1
1134  %.not = icmp eq i64 %dec, 100
1135  br i1 %.not, label %finish.loopexit, label %loop
1136
1137finish.loopexit:
1138  ret void
1139}
1140
1141define void @test_urem(ptr nocapture %a, ptr nocapture readonly %b) {
1142; IF-EVL-LABEL: define void @test_urem(
1143; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1144; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1145; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1146; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1147; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1148; IF-EVL:       [[VECTOR_MEMCHECK]]:
1149; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1150; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
1151; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
1152; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
1153; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1154; IF-EVL:       [[VECTOR_PH]]:
1155; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
1156; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
1157; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
1158; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
1159; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP6]]
1160; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1161; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
1162; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
1163; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1164; IF-EVL:       [[VECTOR_BODY]]:
1165; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1166; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1167; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1168; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
1169; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
1170; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
1171; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
1172; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1173; IF-EVL-NEXT:    [[VP_OP:%.*]] = call <vscale x 16 x i8> @llvm.vp.urem.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> splat (i8 3), <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1174; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
1175; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
1176; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_OP]], ptr align 1 [[TMP17]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP11]])
1177; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
1178; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP18]], [[EVL_BASED_IV]]
1179; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
1180; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1181; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
1182; IF-EVL:       [[MIDDLE_BLOCK]]:
1183; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1184; IF-EVL:       [[SCALAR_PH]]:
1185; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1186; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1187; IF-EVL:       [[LOOP]]:
1188; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1189; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1190; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1191; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1192; IF-EVL-NEXT:    [[TMP:%.*]] = urem i8 [[TMP20]], 3
1193; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1194; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1195; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1196; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
1197; IF-EVL:       [[FINISH_LOOPEXIT]]:
1198; IF-EVL-NEXT:    ret void
1199;
1200; NO-VP-LABEL: define void @test_urem(
1201; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1202; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1203; NO-VP-NEXT:    br label %[[LOOP:.*]]
1204; NO-VP:       [[LOOP]]:
1205; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1206; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1207; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
1208; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
1209; NO-VP-NEXT:    [[TMP:%.*]] = urem i8 [[TMP0]], 3
1210; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
1211; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
1212; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1213; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1214; NO-VP:       [[FINISH_LOOPEXIT]]:
1215; NO-VP-NEXT:    ret void
1216;
1217loop.preheader:
1218  br label %loop
1219
1220loop:
1221  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1222  %dec = add nsw i64 %len, 1
1223  %arrayidx = getelementptr inbounds i8, ptr %a, i64 %len
1224  %0 = load i8, ptr %arrayidx, align 1
1225  %tmp = urem i8 %0, 3
1226  %arrayidx1 = getelementptr inbounds i8, ptr %b, i64 %len
1227  store i8 %tmp, ptr %arrayidx1, align 1
1228  %.not = icmp eq i64 %dec, 100
1229  br i1 %.not, label %finish.loopexit, label %loop
1230
1231finish.loopexit:
1232  ret void
1233}
1234
1235; Floating point tests
1236
1237define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
1238; IF-EVL-LABEL: define void @test_fadd(
1239; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1240; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1241; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1242; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1243; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1244; IF-EVL:       [[VECTOR_MEMCHECK]]:
1245; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1246; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1247; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1248; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
1249; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
1250; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1251; IF-EVL:       [[VECTOR_PH]]:
1252; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1253; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
1254; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
1255; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
1256; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
1257; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1258; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
1259; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
1260; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1261; IF-EVL:       [[VECTOR_BODY]]:
1262; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1263; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1264; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1265; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
1266; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
1267; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
1268; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1269; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1270; IF-EVL-NEXT:    [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1271; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
1272; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
1273; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1274; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
1275; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
1276; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
1277; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1278; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
1279; IF-EVL:       [[MIDDLE_BLOCK]]:
1280; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1281; IF-EVL:       [[SCALAR_PH]]:
1282; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1283; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1284; IF-EVL:       [[LOOP]]:
1285; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1286; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1287; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1288; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1289; IF-EVL-NEXT:    [[TMP:%.*]] = fadd fast float [[TMP21]], 3.000000e+00
1290; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1291; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1292; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1293; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP29:![0-9]+]]
1294; IF-EVL:       [[FINISH_LOOPEXIT]]:
1295; IF-EVL-NEXT:    ret void
1296;
1297; NO-VP-LABEL: define void @test_fadd(
1298; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1299; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1300; NO-VP-NEXT:    br label %[[LOOP:.*]]
1301; NO-VP:       [[LOOP]]:
1302; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1303; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1304; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1305; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1306; NO-VP-NEXT:    [[TMP:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
1307; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1308; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1309; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1310; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1311; NO-VP:       [[FINISH_LOOPEXIT]]:
1312; NO-VP-NEXT:    ret void
1313;
1314loop.preheader:
1315  br label %loop
1316
1317loop:
1318  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1319  %dec = add nsw i64 %len, 1
1320  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1321  %0 = load float, ptr %arrayidx, align 4
1322  %tmp = fadd fast float %0, 3.000000e+00
1323  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1324  store float %tmp, ptr %arrayidx1, align 4
1325  %.not = icmp eq i64 %dec, 100
1326  br i1 %.not, label %finish.loopexit, label %loop
1327
1328finish.loopexit:
1329  ret void
1330}
1331
1332define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
1333; IF-EVL-LABEL: define void @test_fsub(
1334; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1335; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1336; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1337; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1338; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1339; IF-EVL:       [[VECTOR_MEMCHECK]]:
1340; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1341; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1342; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1343; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
1344; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
1345; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1346; IF-EVL:       [[VECTOR_PH]]:
1347; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1348; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
1349; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
1350; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
1351; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
1352; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1353; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
1354; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
1355; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1356; IF-EVL:       [[VECTOR_BODY]]:
1357; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1358; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1359; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1360; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
1361; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
1362; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
1363; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1364; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1365; IF-EVL-NEXT:    [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1366; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
1367; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
1368; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1369; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
1370; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
1371; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
1372; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1373; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1374; IF-EVL:       [[MIDDLE_BLOCK]]:
1375; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1376; IF-EVL:       [[SCALAR_PH]]:
1377; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1378; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1379; IF-EVL:       [[LOOP]]:
1380; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1381; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1382; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1383; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1384; IF-EVL-NEXT:    [[TMP:%.*]] = fsub fast float [[TMP21]], 3.000000e+00
1385; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1386; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1387; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1388; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
1389; IF-EVL:       [[FINISH_LOOPEXIT]]:
1390; IF-EVL-NEXT:    ret void
1391;
1392; NO-VP-LABEL: define void @test_fsub(
1393; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1394; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1395; NO-VP-NEXT:    br label %[[LOOP:.*]]
1396; NO-VP:       [[LOOP]]:
1397; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1398; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1399; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1400; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1401; NO-VP-NEXT:    [[TMP:%.*]] = fsub fast float [[TMP0]], 3.000000e+00
1402; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1403; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1404; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1405; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1406; NO-VP:       [[FINISH_LOOPEXIT]]:
1407; NO-VP-NEXT:    ret void
1408;
1409loop.preheader:
1410  br label %loop
1411
1412loop:
1413  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1414  %dec = add nsw i64 %len, 1
1415  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1416  %0 = load float, ptr %arrayidx, align 4
1417  %tmp = fsub fast float %0, 3.000000e+00
1418  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1419  store float %tmp, ptr %arrayidx1, align 4
1420  %.not = icmp eq i64 %dec, 100
1421  br i1 %.not, label %finish.loopexit, label %loop
1422
1423finish.loopexit:
1424  ret void
1425}
1426
1427define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
1428; IF-EVL-LABEL: define void @test_fmul(
1429; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1430; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1431; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1432; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1433; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1434; IF-EVL:       [[VECTOR_MEMCHECK]]:
1435; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1436; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1437; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1438; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
1439; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
1440; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1441; IF-EVL:       [[VECTOR_PH]]:
1442; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1443; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
1444; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
1445; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
1446; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
1447; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1448; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
1449; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
1450; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1451; IF-EVL:       [[VECTOR_BODY]]:
1452; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1453; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1454; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1455; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
1456; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
1457; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
1458; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1459; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1460; IF-EVL-NEXT:    [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1461; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
1462; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
1463; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1464; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
1465; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
1466; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
1467; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1468; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
1469; IF-EVL:       [[MIDDLE_BLOCK]]:
1470; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1471; IF-EVL:       [[SCALAR_PH]]:
1472; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1473; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1474; IF-EVL:       [[LOOP]]:
1475; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1476; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1477; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1478; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1479; IF-EVL-NEXT:    [[TMP:%.*]] = fmul fast float [[TMP21]], 3.000000e+00
1480; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1481; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1482; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1483; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP33:![0-9]+]]
1484; IF-EVL:       [[FINISH_LOOPEXIT]]:
1485; IF-EVL-NEXT:    ret void
1486;
1487; NO-VP-LABEL: define void @test_fmul(
1488; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1489; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1490; NO-VP-NEXT:    br label %[[LOOP:.*]]
1491; NO-VP:       [[LOOP]]:
1492; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1493; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1494; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1495; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1496; NO-VP-NEXT:    [[TMP:%.*]] = fmul fast float [[TMP0]], 3.000000e+00
1497; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1498; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1499; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1500; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1501; NO-VP:       [[FINISH_LOOPEXIT]]:
1502; NO-VP-NEXT:    ret void
1503;
1504loop.preheader:
1505  br label %loop
1506
1507loop:
1508  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1509  %dec = add nsw i64 %len, 1
1510  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1511  %0 = load float, ptr %arrayidx, align 4
1512  %tmp = fmul fast float %0, 3.000000e+00
1513  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1514  store float %tmp, ptr %arrayidx1, align 4
1515  %.not = icmp eq i64 %dec, 100
1516  br i1 %.not, label %finish.loopexit, label %loop
1517
1518finish.loopexit:
1519  ret void
1520}
1521
1522define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
1523; IF-EVL-LABEL: define void @test_fdiv(
1524; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1525; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1526; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1527; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1528; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1529; IF-EVL:       [[VECTOR_MEMCHECK]]:
1530; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1531; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1532; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1533; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
1534; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
1535; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1536; IF-EVL:       [[VECTOR_PH]]:
1537; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1538; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
1539; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
1540; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
1541; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
1542; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1543; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
1544; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
1545; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1546; IF-EVL:       [[VECTOR_BODY]]:
1547; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1548; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1549; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1550; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
1551; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
1552; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
1553; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1554; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1555; IF-EVL-NEXT:    [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1556; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
1557; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
1558; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1559; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
1560; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
1561; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
1562; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1563; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
1564; IF-EVL:       [[MIDDLE_BLOCK]]:
1565; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1566; IF-EVL:       [[SCALAR_PH]]:
1567; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1568; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1569; IF-EVL:       [[LOOP]]:
1570; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1571; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1572; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1573; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1574; IF-EVL-NEXT:    [[TMP:%.*]] = fdiv fast float [[TMP21]], 3.000000e+00
1575; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1576; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1577; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1578; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP35:![0-9]+]]
1579; IF-EVL:       [[FINISH_LOOPEXIT]]:
1580; IF-EVL-NEXT:    ret void
1581;
1582; NO-VP-LABEL: define void @test_fdiv(
1583; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1584; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1585; NO-VP-NEXT:    br label %[[LOOP:.*]]
1586; NO-VP:       [[LOOP]]:
1587; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1588; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1589; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1590; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1591; NO-VP-NEXT:    [[TMP:%.*]] = fdiv fast float [[TMP0]], 3.000000e+00
1592; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1593; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1594; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1595; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1596; NO-VP:       [[FINISH_LOOPEXIT]]:
1597; NO-VP-NEXT:    ret void
1598;
1599loop.preheader:
1600  br label %loop
1601
1602loop:
1603  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1604  %dec = add nsw i64 %len, 1
1605  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1606  %0 = load float, ptr %arrayidx, align 4
1607  %tmp = fdiv fast float %0, 3.000000e+00
1608  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1609  store float %tmp, ptr %arrayidx1, align 4
1610  %.not = icmp eq i64 %dec, 100
1611  br i1 %.not, label %finish.loopexit, label %loop
1612
1613finish.loopexit:
1614  ret void
1615}
1616
1617define void @test_frem(ptr nocapture %a, ptr nocapture readonly %b) {
1618; IF-EVL-LABEL: define void @test_frem(
1619; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1620; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1621; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1622; IF-EVL:       [[LOOP]]:
1623; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1624; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1625; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1626; IF-EVL-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1627; IF-EVL-NEXT:    [[TMP:%.*]] = frem fast float [[TMP0]], 3.000000e+00
1628; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1629; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1630; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1631; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1632; IF-EVL:       [[FINISH_LOOPEXIT]]:
1633; IF-EVL-NEXT:    ret void
1634;
1635; NO-VP-LABEL: define void @test_frem(
1636; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1637; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1638; NO-VP-NEXT:    br label %[[LOOP:.*]]
1639; NO-VP:       [[LOOP]]:
1640; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1641; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1642; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1643; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1644; NO-VP-NEXT:    [[TMP:%.*]] = frem fast float [[TMP0]], 3.000000e+00
1645; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1646; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1647; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1648; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1649; NO-VP:       [[FINISH_LOOPEXIT]]:
1650; NO-VP-NEXT:    ret void
1651;
1652loop.preheader:
1653  br label %loop
1654
1655loop:
1656  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1657  %dec = add nsw i64 %len, 1
1658  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1659  %0 = load float, ptr %arrayidx, align 4
1660  %tmp = frem fast float %0, 3.000000e+00
1661  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1662  store float %tmp, ptr %arrayidx1, align 4
1663  %.not = icmp eq i64 %dec, 100
1664  br i1 %.not, label %finish.loopexit, label %loop
1665
1666finish.loopexit:
1667  ret void
1668}
1669
1670define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
1671; IF-EVL-LABEL: define void @test_fneg(
1672; IF-EVL-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1673; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
1674; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
1675; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
1676; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
1677; IF-EVL:       [[VECTOR_MEMCHECK]]:
1678; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
1679; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
1680; IF-EVL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
1681; IF-EVL-NEXT:    [[TMP3:%.*]] = sub i64 [[B1]], [[A2]]
1682; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP2]]
1683; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
1684; IF-EVL:       [[VECTOR_PH]]:
1685; IF-EVL-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
1686; IF-EVL-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
1687; IF-EVL-NEXT:    [[TMP8:%.*]] = sub i64 [[TMP7]], 1
1688; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP8]]
1689; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
1690; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1691; IF-EVL-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
1692; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
1693; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
1694; IF-EVL:       [[VECTOR_BODY]]:
1695; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1696; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
1697; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 100, [[EVL_BASED_IV]]
1698; IF-EVL-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true)
1699; IF-EVL-NEXT:    [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0
1700; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
1701; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
1702; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1703; IF-EVL-NEXT:    [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1704; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
1705; IF-EVL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
1706; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
1707; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP12]] to i64
1708; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
1709; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
1710; IF-EVL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1711; IF-EVL-NEXT:    br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
1712; IF-EVL:       [[MIDDLE_BLOCK]]:
1713; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
1714; IF-EVL:       [[SCALAR_PH]]:
1715; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_MEMCHECK]] ], [ 0, %[[LOOP_PREHEADER]] ]
1716; IF-EVL-NEXT:    br label %[[LOOP:.*]]
1717; IF-EVL:       [[LOOP]]:
1718; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
1719; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1720; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1721; IF-EVL-NEXT:    [[TMP21:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1722; IF-EVL-NEXT:    [[TMP:%.*]] = fneg fast float [[TMP21]]
1723; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1724; IF-EVL-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1725; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1726; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP37:![0-9]+]]
1727; IF-EVL:       [[FINISH_LOOPEXIT]]:
1728; IF-EVL-NEXT:    ret void
1729;
1730; NO-VP-LABEL: define void @test_fneg(
1731; NO-VP-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
1732; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
1733; NO-VP-NEXT:    br label %[[LOOP:.*]]
1734; NO-VP:       [[LOOP]]:
1735; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
1736; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
1737; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[LEN]]
1738; NO-VP-NEXT:    [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
1739; NO-VP-NEXT:    [[TMP:%.*]] = fneg fast float [[TMP0]]
1740; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[LEN]]
1741; NO-VP-NEXT:    store float [[TMP]], ptr [[ARRAYIDX1]], align 4
1742; NO-VP-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
1743; NO-VP-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT:.*]], label %[[LOOP]]
1744; NO-VP:       [[FINISH_LOOPEXIT]]:
1745; NO-VP-NEXT:    ret void
1746;
1747loop.preheader:
1748  br label %loop
1749
1750loop:
1751  %len = phi i64 [ %dec, %loop ], [ 0, %loop.preheader ]
1752  %dec = add nsw i64 %len, 1
1753  %arrayidx = getelementptr inbounds float, ptr %a, i64 %len
1754  %0 = load float, ptr %arrayidx, align 4
1755  %tmp = fneg fast float %0
1756  %arrayidx1 = getelementptr inbounds float, ptr %b, i64 %len
1757  store float %tmp, ptr %arrayidx1, align 4
1758  %.not = icmp eq i64 %dec, 100
1759  br i1 %.not, label %finish.loopexit, label %loop
1760
1761finish.loopexit:
1762  ret void
1763}
1764;.
1765; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1766; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1767; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1768; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
1769; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1770; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
1771; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
1772; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
1773; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1774; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
1775; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1776; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
1777; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1778; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
1779; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1780; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]}
1781; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1782; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
1783; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
1784; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]}
1785; IF-EVL: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
1786; IF-EVL: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]}
1787; IF-EVL: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
1788; IF-EVL: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]}
1789; IF-EVL: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
1790; IF-EVL: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]]}
1791; IF-EVL: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
1792; IF-EVL: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]}
1793; IF-EVL: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
1794; IF-EVL: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]}
1795; IF-EVL: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
1796; IF-EVL: [[LOOP31]] = distinct !{[[LOOP31]], [[META1]]}
1797; IF-EVL: [[LOOP32]] = distinct !{[[LOOP32]], [[META1]], [[META2]]}
1798; IF-EVL: [[LOOP33]] = distinct !{[[LOOP33]], [[META1]]}
1799; IF-EVL: [[LOOP34]] = distinct !{[[LOOP34]], [[META1]], [[META2]]}
1800; IF-EVL: [[LOOP35]] = distinct !{[[LOOP35]], [[META1]]}
1801; IF-EVL: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
1802; IF-EVL: [[LOOP37]] = distinct !{[[LOOP37]], [[META1]]}
1803;.
1804