1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize \
3; RUN: -force-tail-folding-style=data-with-evl \
4; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
5; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL
6
7; RUN: opt -passes=loop-vectorize \
8; RUN: -force-tail-folding-style=none \
9; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
10; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP
11
12define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %ptr2) {
13; IF-EVL-LABEL: @reverse_load_store(
14; IF-EVL-NEXT:  entry:
15; IF-EVL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
16; IF-EVL:       vector.ph:
17; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
18; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
19; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
20; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
21; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
22; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
23; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
24; IF-EVL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
25; IF-EVL-NEXT:    [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]]
26; IF-EVL-NEXT:    [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
27; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
28; IF-EVL:       vector.body:
29; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
30; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
31; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
32; IF-EVL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
33; IF-EVL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
34; IF-EVL-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
35; IF-EVL-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], -1
36; IF-EVL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP7]]
37; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP5]] to i64
38; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 0, [[TMP18]]
39; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 1, [[TMP18]]
40; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[TMP8]], i64 [[TMP9]]
41; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP10]]
42; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
43; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
44; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP7]]
45; IF-EVL-NEXT:    [[TMP19:%.*]] = zext i32 [[TMP5]] to i64
46; IF-EVL-NEXT:    [[TMP14:%.*]] = mul i64 0, [[TMP19]]
47; IF-EVL-NEXT:    [[TMP15:%.*]] = sub i64 1, [[TMP19]]
48; IF-EVL-NEXT:    [[TMP22:%.*]] = getelementptr i32, ptr [[TMP13]], i64 [[TMP14]]
49; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP15]]
50; IF-EVL-NEXT:    [[VP_REVERSE3:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
51; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE3]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
52; IF-EVL-NEXT:    [[TMP20:%.*]] = zext i32 [[TMP5]] to i64
53; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP20]], [[EVL_BASED_IV]]
54; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
55; IF-EVL-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
56; IF-EVL-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
57; IF-EVL:       middle.block:
58; IF-EVL-NEXT:    br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
59; IF-EVL:       scalar.ph:
60; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ]
61; IF-EVL-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
62; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
63; IF-EVL:       for.body:
64; IF-EVL-NEXT:    [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
65; IF-EVL-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
66; IF-EVL-NEXT:    [[ADD]] = add i64 [[ADD_PHI]], -1
67; IF-EVL-NEXT:    [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD]]
68; IF-EVL-NEXT:    [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4
69; IF-EVL-NEXT:    [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]]
70; IF-EVL-NEXT:    store i32 [[TMP]], ptr [[GEPS]], align 4
71; IF-EVL-NEXT:    [[INC]] = add i32 [[I]], 1
72; IF-EVL-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024
73; IF-EVL-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP3:![0-9]+]]
74; IF-EVL:       loopend:
75; IF-EVL-NEXT:    ret void
76;
77; NO-VP-LABEL: @reverse_load_store(
78; NO-VP-NEXT:  entry:
79; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
80; NO-VP:       for.body:
81; NO-VP-NEXT:    [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
82; NO-VP-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
83; NO-VP-NEXT:    [[ADD]] = add i64 [[ADD_PHI]], -1
84; NO-VP-NEXT:    [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[ADD]]
85; NO-VP-NEXT:    [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4
86; NO-VP-NEXT:    [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]]
87; NO-VP-NEXT:    store i32 [[TMP]], ptr [[GEPS]], align 4
88; NO-VP-NEXT:    [[INC]] = add i32 [[I]], 1
89; NO-VP-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024
90; NO-VP-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]]
91; NO-VP:       loopend:
92; NO-VP-NEXT:    ret void
93;
94entry:
95  br label %for.body
96
97for.body:
98  %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.body ]
99  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
100  %add = add i64 %add.phi, -1
101  %gepl = getelementptr inbounds i32, ptr %ptr, i64 %add
102  %tmp = load i32, ptr %gepl, align 4
103  %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add
104  store i32 %tmp, ptr %geps, align 4
105  %inc = add i32 %i, 1
106  %exitcond = icmp ne i32 %inc, 1024
107  br i1 %exitcond, label %for.body, label %loopend
108
109loopend:
110  ret void
111}
112
113define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noalias %ptr1, ptr noalias %ptr2) {
114; IF-EVL-LABEL: @reverse_load_store_masked(
115; IF-EVL-NEXT:  entry:
116; IF-EVL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
117; IF-EVL:       vector.ph:
118; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
119; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
120; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
121; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]]
122; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
123; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
124; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
125; IF-EVL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
126; IF-EVL-NEXT:    [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]]
127; IF-EVL-NEXT:    [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
128; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
129; IF-EVL:       vector.body:
130; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
131; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
132; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
133; IF-EVL-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
134; IF-EVL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[EVL_BASED_IV]]
135; IF-EVL-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
136; IF-EVL-NEXT:    [[OFFSET_IDX3:%.*]] = trunc i64 [[EVL_BASED_IV]] to i32
137; IF-EVL-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX3]], 0
138; IF-EVL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[EVL_BASED_IV]], i64 0
139; IF-EVL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
140; IF-EVL-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
141; IF-EVL-NEXT:    [[TMP9:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
142; IF-EVL-NEXT:    [[VEC_IV:%.*]] = add <vscale x 4 x i64> [[BROADCAST_SPLAT]], [[TMP9]]
143; IF-EVL-NEXT:    [[TMP10:%.*]] = icmp ule <vscale x 4 x i64> [[VEC_IV]], splat (i64 1023)
144; IF-EVL-NEXT:    [[TMP11:%.*]] = add i64 [[TMP6]], -1
145; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP7]]
146; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0
147; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
148; IF-EVL-NEXT:    [[TMP14:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], splat (i32 100)
149; IF-EVL-NEXT:    [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> zeroinitializer
150; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP11]]
151; IF-EVL-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP5]] to i64
152; IF-EVL-NEXT:    [[TMP17:%.*]] = mul i64 0, [[TMP26]]
153; IF-EVL-NEXT:    [[TMP18:%.*]] = sub i64 1, [[TMP26]]
154; IF-EVL-NEXT:    [[TMP19:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP17]]
155; IF-EVL-NEXT:    [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i64 [[TMP18]]
156; IF-EVL-NEXT:    [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
157; IF-EVL-NEXT:    [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP5]])
158; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
159; IF-EVL-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP11]]
160; IF-EVL-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP5]] to i64
161; IF-EVL-NEXT:    [[TMP22:%.*]] = mul i64 0, [[TMP27]]
162; IF-EVL-NEXT:    [[TMP23:%.*]] = sub i64 1, [[TMP27]]
163; IF-EVL-NEXT:    [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP22]]
164; IF-EVL-NEXT:    [[TMP25:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP23]]
165; IF-EVL-NEXT:    [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
166; IF-EVL-NEXT:    [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
167; IF-EVL-NEXT:    call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP25]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP5]])
168; IF-EVL-NEXT:    [[TMP28:%.*]] = zext i32 [[TMP5]] to i64
169; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP28]], [[EVL_BASED_IV]]
170; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
171; IF-EVL-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
172; IF-EVL-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
173; IF-EVL:       middle.block:
174; IF-EVL-NEXT:    br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
175; IF-EVL:       scalar.ph:
176; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ]
177; IF-EVL-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
178; IF-EVL-NEXT:    br label [[FOR_BODY:%.*]]
179; IF-EVL:       for.body:
180; IF-EVL-NEXT:    [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
181; IF-EVL-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_INC]] ]
182; IF-EVL-NEXT:    [[ADD]] = add i64 [[ADD_PHI]], -1
183; IF-EVL-NEXT:    [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[I]]
184; IF-EVL-NEXT:    [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4
185; IF-EVL-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP]], 100
186; IF-EVL-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
187; IF-EVL:       if.then:
188; IF-EVL-NEXT:    [[GEPL1:%.*]] = getelementptr inbounds i32, ptr [[PTR1]], i64 [[ADD]]
189; IF-EVL-NEXT:    [[V:%.*]] = load i32, ptr [[GEPL1]], align 4
190; IF-EVL-NEXT:    [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]]
191; IF-EVL-NEXT:    store i32 [[V]], ptr [[GEPS]], align 4
192; IF-EVL-NEXT:    br label [[FOR_INC]]
193; IF-EVL:       for.inc:
194; IF-EVL-NEXT:    [[INC]] = add i32 [[I]], 1
195; IF-EVL-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024
196; IF-EVL-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP5:![0-9]+]]
197; IF-EVL:       loopend:
198; IF-EVL-NEXT:    ret void
199;
200; NO-VP-LABEL: @reverse_load_store_masked(
201; NO-VP-NEXT:  entry:
202; NO-VP-NEXT:    br label [[FOR_BODY:%.*]]
203; NO-VP:       for.body:
204; NO-VP-NEXT:    [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
205; NO-VP-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
206; NO-VP-NEXT:    [[ADD]] = add i64 [[ADD_PHI]], -1
207; NO-VP-NEXT:    [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I]]
208; NO-VP-NEXT:    [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4
209; NO-VP-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP]], 100
210; NO-VP-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
211; NO-VP:       if.then:
212; NO-VP-NEXT:    [[GEPL1:%.*]] = getelementptr inbounds i32, ptr [[PTR1:%.*]], i64 [[ADD]]
213; NO-VP-NEXT:    [[V:%.*]] = load i32, ptr [[GEPL1]], align 4
214; NO-VP-NEXT:    [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]]
215; NO-VP-NEXT:    store i32 [[V]], ptr [[GEPS]], align 4
216; NO-VP-NEXT:    br label [[FOR_INC]]
217; NO-VP:       for.inc:
218; NO-VP-NEXT:    [[INC]] = add i32 [[I]], 1
219; NO-VP-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024
220; NO-VP-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]]
221; NO-VP:       loopend:
222; NO-VP-NEXT:    ret void
223;
224entry:
225  br label %for.body
226
227for.body:
228  %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.inc ]
229  %i = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
230  %add = add i64 %add.phi, -1
231  %gepl = getelementptr inbounds i32, ptr %ptr, i32 %i
232  %tmp = load i32, ptr %gepl, align 4
233  %cmp1 = icmp slt i32 %tmp, 100
234  br i1 %cmp1, label %if.then, label %for.inc
235
236if.then:
237  %gepl1 = getelementptr inbounds i32, ptr %ptr1, i64 %add
238  %v = load i32, ptr %gepl1, align 4
239  %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add
240  store i32 %v, ptr %geps, align 4
241  br label %for.inc
242
243for.inc:
244  %inc = add i32 %i, 1
245  %exitcond = icmp ne i32 %inc, 1024
246  br i1 %exitcond, label %for.body, label %loopend
247
248loopend:
249  ret void
250}
251
252; From a miscompile originally reported at
253; https://github.com/llvm/llvm-project/issues/122681
254
255define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d) {
256; IF-EVL-LABEL: @multiple_reverse_vector_pointer(
257; IF-EVL-NEXT:  entry:
258; IF-EVL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
259; IF-EVL:       vector.ph:
260; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
261; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
262; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1
263; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 1025, [[TMP2]]
264; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
265; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
266; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
267; IF-EVL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 16
268; IF-EVL-NEXT:    [[TMP5:%.*]] = sub i64 1024, [[N_VEC]]
269; IF-EVL-NEXT:    br label [[VECTOR_BODY:%.*]]
270; IF-EVL:       vector.body:
271; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
272; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
273; IF-EVL-NEXT:    [[AVL:%.*]] = sub i64 1025, [[EVL_BASED_IV]]
274; IF-EVL-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
275; IF-EVL-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1024, [[EVL_BASED_IV]]
276; IF-EVL-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
277; IF-EVL-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP7]]
278; IF-EVL-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
279; IF-EVL-NEXT:    [[TMP10:%.*]] = mul i64 0, [[TMP9]]
280; IF-EVL-NEXT:    [[TMP11:%.*]] = sub i64 1, [[TMP9]]
281; IF-EVL-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP10]]
282; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP11]]
283; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP13]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
284; IF-EVL-NEXT:    [[VP_REVERSE:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
285; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[B:%.*]], <vscale x 16 x i8> [[VP_REVERSE]]
286; IF-EVL-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 16 x i8> @llvm.vp.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> align 1 [[TMP14]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
287; IF-EVL-NEXT:    [[TMP15:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[TMP7]]
288; IF-EVL-NEXT:    [[TMP16:%.*]] = zext i32 [[TMP6]] to i64
289; IF-EVL-NEXT:    [[TMP17:%.*]] = mul i64 0, [[TMP16]]
290; IF-EVL-NEXT:    [[TMP18:%.*]] = sub i64 1, [[TMP16]]
291; IF-EVL-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP15]], i64 [[TMP17]]
292; IF-EVL-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[TMP19]], i64 [[TMP18]]
293; IF-EVL-NEXT:    [[VP_REVERSE1:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
294; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE1]], ptr align 1 [[TMP20]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
295; IF-EVL-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[TMP7]]
296; IF-EVL-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
297; IF-EVL-NEXT:    [[TMP23:%.*]] = mul i64 0, [[TMP22]]
298; IF-EVL-NEXT:    [[TMP24:%.*]] = sub i64 1, [[TMP22]]
299; IF-EVL-NEXT:    [[TMP25:%.*]] = getelementptr i8, ptr [[TMP21]], i64 [[TMP23]]
300; IF-EVL-NEXT:    [[TMP26:%.*]] = getelementptr i8, ptr [[TMP25]], i64 [[TMP24]]
301; IF-EVL-NEXT:    [[VP_REVERSE2:%.*]] = call <vscale x 16 x i8> @llvm.experimental.vp.reverse.nxv16i8(<vscale x 16 x i8> [[WIDE_MASKED_GATHER]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
302; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[VP_REVERSE2]], ptr align 1 [[TMP26]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
303; IF-EVL-NEXT:    [[TMP27:%.*]] = zext i32 [[TMP6]] to i64
304; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP27]], [[EVL_BASED_IV]]
305; IF-EVL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
306; IF-EVL-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
307; IF-EVL-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
308; IF-EVL:       middle.block:
309; IF-EVL-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
310; IF-EVL:       scalar.ph:
311; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP5]], [[MIDDLE_BLOCK]] ], [ 1024, [[ENTRY:%.*]] ]
312; IF-EVL-NEXT:    br label [[LOOP:%.*]]
313; IF-EVL:       loop:
314; IF-EVL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
315; IF-EVL-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A]], i64 [[IV]]
316; IF-EVL-NEXT:    [[X:%.*]] = load i8, ptr [[GEP_A]], align 1
317; IF-EVL-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B]], i8 [[X]]
318; IF-EVL-NEXT:    [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1
319; IF-EVL-NEXT:    [[GEP_C:%.*]] = getelementptr i8, ptr [[C]], i64 [[IV]]
320; IF-EVL-NEXT:    store i8 [[Y]], ptr [[GEP_C]], align 1
321; IF-EVL-NEXT:    [[GEP_D:%.*]] = getelementptr i8, ptr [[D]], i64 [[IV]]
322; IF-EVL-NEXT:    store i8 [[Y]], ptr [[GEP_D]], align 1
323; IF-EVL-NEXT:    [[IV_NEXT]] = add i64 [[IV]], -1
324; IF-EVL-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0
325; IF-EVL-NEXT:    br i1 [[CMP_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
326; IF-EVL:       exit:
327; IF-EVL-NEXT:    ret void
328;
329; NO-VP-LABEL: @multiple_reverse_vector_pointer(
330; NO-VP-NEXT:  entry:
331; NO-VP-NEXT:    br label [[LOOP:%.*]]
332; NO-VP:       loop:
333; NO-VP-NEXT:    [[IV:%.*]] = phi i64 [ 1024, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
334; NO-VP-NEXT:    [[GEP_A:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[IV]]
335; NO-VP-NEXT:    [[X:%.*]] = load i8, ptr [[GEP_A]], align 1
336; NO-VP-NEXT:    [[GEP_B:%.*]] = getelementptr i8, ptr [[B:%.*]], i8 [[X]]
337; NO-VP-NEXT:    [[Y:%.*]] = load i8, ptr [[GEP_B]], align 1
338; NO-VP-NEXT:    [[GEP_C:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[IV]]
339; NO-VP-NEXT:    store i8 [[Y]], ptr [[GEP_C]], align 1
340; NO-VP-NEXT:    [[GEP_D:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[IV]]
341; NO-VP-NEXT:    store i8 [[Y]], ptr [[GEP_D]], align 1
342; NO-VP-NEXT:    [[IV_NEXT]] = add i64 [[IV]], -1
343; NO-VP-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0
344; NO-VP-NEXT:    br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]]
345; NO-VP:       exit:
346; NO-VP-NEXT:    ret void
347;
348entry:
349  br label %loop
350
351loop:
352  %iv = phi i64 [ 1024, %entry ], [ %iv.next, %loop ]
353
354  %gep.a = getelementptr i8, ptr %a, i64 %iv
355  %x = load i8, ptr %gep.a
356
357  %gep.b = getelementptr i8, ptr %b, i8 %x
358  %y = load i8, ptr %gep.b
359
360  %gep.c = getelementptr i8, ptr %c, i64 %iv
361  store i8 %y, ptr %gep.c
362
363  %gep.d = getelementptr i8, ptr %d, i64 %iv
364  store i8 %y, ptr %gep.d
365
366  %iv.next = add i64 %iv, -1
367  %cmp.not = icmp eq i64 %iv, 0
368  br i1 %cmp.not, label %exit, label %loop
369
370exit:
371  ret void
372}
373