xref: /llvm-project/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -S | FileCheck --check-prefixes=CHECK,NOSTRIDED %s
3; RUN: opt < %s -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -lv-strided-pointer-ivs=true -laa-speculate-unit-stride=false -S | FileCheck --check-prefixes=CHECK,STRIDED %s
4
5
6define void @single_constant_stride_int_scaled(ptr %p) {
7; CHECK-LABEL: @single_constant_stride_int_scaled(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
12; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
13; CHECK:       vector.ph:
14; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
15; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
17; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
18; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
19; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[TMP5]]
20; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
21; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 4
22; CHECK-NEXT:    [[TMP8:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
23; CHECK-NEXT:    [[TMP10:%.*]] = mul <vscale x 4 x i64> [[TMP8]], splat (i64 1)
24; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP10]]
25; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 1, [[TMP7]]
26; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
27; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
28; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
29; CHECK:       vector.body:
30; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
31; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
32; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 8)
33; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP14]]
34; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
35; CHECK-NEXT:    [[TMP16:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
36; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP16]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true))
37; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
38; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
39; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
40; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
41; CHECK:       middle.block:
42; CHECK-NEXT:    br label [[SCALAR_PH]]
43; CHECK:       scalar.ph:
44; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
45; CHECK-NEXT:    br label [[LOOP:%.*]]
46; CHECK:       loop:
47; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
48; CHECK-NEXT:    [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], 8
49; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
50; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
51; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
52; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
53; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
54; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
55; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
56; CHECK:       exit:
57; CHECK-NEXT:    ret void
58;
59entry:
60  br label %loop
61loop:
62  %i = phi i64 [0, %entry], [%nexti, %loop]
63
64  %offset = mul nsw nuw i64 %i, 8
65  %q0 = getelementptr i32, ptr %p, i64 %offset
66  %x0 = load i32, ptr %q0
67  %y0 = add i32 %x0, 1
68  store i32 %y0, ptr %q0
69
70  %nexti = add i64 %i, 1
71  %done = icmp eq i64 %nexti, 1024
72  br i1 %done, label %exit, label %loop
73exit:
74  ret void
75}
76
77define void @single_constant_stride_int_iv(ptr %p) {
78; CHECK-LABEL: @single_constant_stride_int_iv(
79; CHECK-NEXT:  entry:
80; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
81; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
82; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
83; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
84; CHECK:       vector.ph:
85; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
86; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
87; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
88; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
89; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
90; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
91; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 64
92; CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
93; CHECK-NEXT:    [[TMP8:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 64)
94; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP8]]
95; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 64, [[TMP5]]
96; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP11]], i64 0
97; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
98; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
99; CHECK:       vector.body:
100; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
101; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
102; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[VEC_IND]]
103; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison)
104; CHECK-NEXT:    [[TMP13:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
105; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP13]], <vscale x 4 x ptr> [[TMP12]], i32 4, <vscale x 4 x i1> splat (i1 true))
106; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
107; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
108; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
109; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
110; CHECK:       middle.block:
111; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
112; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
113; CHECK:       scalar.ph:
114; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
115; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
116; CHECK-NEXT:    br label [[LOOP:%.*]]
117; CHECK:       loop:
118; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
119; CHECK-NEXT:    [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
120; CHECK-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
121; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
122; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
123; CHECK-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
124; CHECK-NEXT:    [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], 64
125; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
126; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
127; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
128; CHECK:       exit:
129; CHECK-NEXT:    ret void
130;
131entry:
132  br label %loop
133loop:
134  %i = phi i64 [0, %entry], [%nexti, %loop]
135  %offset = phi i64 [0, %entry], [%offset.next, %loop]
136
137  %q0 = getelementptr i32, ptr %p, i64 %offset
138  %x0 = load i32, ptr %q0
139  %y0 = add i32 %x0, 1
140  store i32 %y0, ptr %q0
141
142  %offset.next = add nsw nuw i64 %offset, 64
143  %nexti = add i64 %i, 1
144  %done = icmp eq i64 %nexti, 1024
145  br i1 %done, label %exit, label %loop
146exit:
147  ret void
148}
149
150
151define void @single_constant_stride_ptr_iv(ptr %p) {
152; CHECK-LABEL: @single_constant_stride_ptr_iv(
153; CHECK-NEXT:  entry:
154; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
155; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
156; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 1024, [[TMP1]]
157; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
158; CHECK:       vector.ph:
159; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
160; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
161; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
162; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
163; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]]
164; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[TMP5]]
165; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
166; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
167; CHECK-NEXT:    [[TMP18:%.*]] = mul i64 [[N_VEC]], 8
168; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP18]]
169; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
170; CHECK:       vector.body:
171; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
172; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
173; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
174; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
175; CHECK-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 1
176; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 8, [[TMP11]]
177; CHECK-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP10]], 0
178; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP13]], i64 0
179; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
180; CHECK-NEXT:    [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
181; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP14]]
182; CHECK-NEXT:    [[TMP16:%.*]] = mul <vscale x 4 x i64> [[TMP15]], splat (i64 8)
183; CHECK-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP16]]
184; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <vscale x 4 x ptr> [[VECTOR_GEP]], i32 0
185; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP17]], align 4
186; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
187; CHECK-NEXT:    [[TMP19:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
188; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 4 x i32> [[TMP19]], splat (i32 1)
189; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true))
190; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
191; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP12]]
192; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
193; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
194; CHECK:       middle.block:
195; CHECK-NEXT:    br label [[SCALAR_PH]]
196; CHECK:       scalar.ph:
197; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
198; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[ENTRY]] ]
199; CHECK-NEXT:    br label [[LOOP:%.*]]
200; CHECK:       loop:
201; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
202; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
203; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
204; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
205; CHECK-NEXT:    store i32 [[Y0]], ptr [[PTR]], align 4
206; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
207; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
208; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
209; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
210; CHECK:       exit:
211; CHECK-NEXT:    ret void
212;
213entry:
214  br label %loop
215loop:
216  %i = phi i64 [0, %entry], [%nexti, %loop]
217  %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
218
219  %x0 = load i32, ptr %ptr
220  %y0 = add i32 %x0, 1
221  store i32 %y0, ptr %ptr
222
223  %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 8
224  %nexti = add i64 %i, 1
225  %done = icmp eq i64 %nexti, 1024
226  br i1 %done, label %exit, label %loop
227exit:
228  ret void
229}
230
231
232define void @single_stride_int_scaled(ptr %p, i64 %stride) {
233; NOSTRIDED-LABEL: @single_stride_int_scaled(
234; NOSTRIDED-NEXT:  entry:
235; NOSTRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
236; NOSTRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
237; NOSTRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
238; NOSTRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
239; NOSTRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
240; NOSTRIDED:       vector.scevcheck:
241; NOSTRIDED-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
242; NOSTRIDED-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
243; NOSTRIDED:       vector.ph:
244; NOSTRIDED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
245; NOSTRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
246; NOSTRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
247; NOSTRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
248; NOSTRIDED-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
249; NOSTRIDED-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
250; NOSTRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
251; NOSTRIDED:       vector.body:
252; NOSTRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
253; NOSTRIDED-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
254; NOSTRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
255; NOSTRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
256; NOSTRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
257; NOSTRIDED-NEXT:    [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
258; NOSTRIDED-NEXT:    store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
259; NOSTRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
260; NOSTRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
261; NOSTRIDED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
262; NOSTRIDED:       middle.block:
263; NOSTRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
264; NOSTRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
265; NOSTRIDED:       scalar.ph:
266; NOSTRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
267; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
268; NOSTRIDED:       loop:
269; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
270; NOSTRIDED-NEXT:    [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
271; NOSTRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
272; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
273; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
274; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
275; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
276; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
277; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
278; NOSTRIDED:       exit:
279; NOSTRIDED-NEXT:    ret void
280;
281; STRIDED-LABEL: @single_stride_int_scaled(
282; STRIDED-NEXT:  entry:
283; STRIDED-NEXT:    br label [[LOOP:%.*]]
284; STRIDED:       loop:
285; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
286; STRIDED-NEXT:    [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE:%.*]]
287; STRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
288; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
289; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
290; STRIDED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
291; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
292; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
293; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
294; STRIDED:       exit:
295; STRIDED-NEXT:    ret void
296;
297entry:
298  br label %loop
299loop:
300  %i = phi i64 [0, %entry], [%nexti, %loop]
301
302  %offset = mul nsw nuw i64 %i, %stride
303  %q0 = getelementptr i32, ptr %p, i64 %offset
304  %x0 = load i32, ptr %q0
305  %y0 = add i32 %x0, 1
306  store i32 %y0, ptr %q0
307
308  %nexti = add i64 %i, 1
309  %done = icmp eq i64 %nexti, 1024
310  br i1 %done, label %exit, label %loop
311exit:
312  ret void
313}
314
315define void @single_stride_int_iv(ptr %p, i64 %stride) {
316; NOSTRIDED-LABEL: @single_stride_int_iv(
317; NOSTRIDED-NEXT:  entry:
318; NOSTRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
319; NOSTRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
320; NOSTRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
321; NOSTRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
322; NOSTRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
323; NOSTRIDED:       vector.scevcheck:
324; NOSTRIDED-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
325; NOSTRIDED-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
326; NOSTRIDED:       vector.ph:
327; NOSTRIDED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
328; NOSTRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
329; NOSTRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
330; NOSTRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
331; NOSTRIDED-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
332; NOSTRIDED-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
333; NOSTRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
334; NOSTRIDED:       vector.body:
335; NOSTRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
336; NOSTRIDED-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
337; NOSTRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
338; NOSTRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
339; NOSTRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
340; NOSTRIDED-NEXT:    [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
341; NOSTRIDED-NEXT:    store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
342; NOSTRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
343; NOSTRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
344; NOSTRIDED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
345; NOSTRIDED:       middle.block:
346; NOSTRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
347; NOSTRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
348; NOSTRIDED:       scalar.ph:
349; NOSTRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
350; NOSTRIDED-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
351; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
352; NOSTRIDED:       loop:
353; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
354; NOSTRIDED-NEXT:    [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
355; NOSTRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
356; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
357; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
358; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
359; NOSTRIDED-NEXT:    [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE]]
360; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
361; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
362; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
363; NOSTRIDED:       exit:
364; NOSTRIDED-NEXT:    ret void
365;
366; STRIDED-LABEL: @single_stride_int_iv(
367; STRIDED-NEXT:  entry:
368; STRIDED-NEXT:    br label [[LOOP:%.*]]
369; STRIDED:       loop:
370; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
371; STRIDED-NEXT:    [[OFFSET:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
372; STRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
373; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
374; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
375; STRIDED-NEXT:    store i32 [[Y0]], ptr [[Q0]], align 4
376; STRIDED-NEXT:    [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE:%.*]]
377; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
378; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
379; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
380; STRIDED:       exit:
381; STRIDED-NEXT:    ret void
382;
383entry:
384  br label %loop
385loop:
386  %i = phi i64 [0, %entry], [%nexti, %loop]
387  %offset = phi i64 [0, %entry], [%offset.next, %loop]
388
389  %q0 = getelementptr i32, ptr %p, i64 %offset
390  %x0 = load i32, ptr %q0
391  %y0 = add i32 %x0, 1
392  store i32 %y0, ptr %q0
393
394  %offset.next = add nsw nuw i64 %offset, %stride
395  %nexti = add i64 %i, 1
396  %done = icmp eq i64 %nexti, 1024
397  br i1 %done, label %exit, label %loop
398exit:
399  ret void
400}
401
402
403define void @single_stride_ptr_iv(ptr %p, i64 %stride) {
404; CHECK-LABEL: @single_stride_ptr_iv(
405; CHECK-NEXT:  entry:
406; CHECK-NEXT:    br label [[LOOP:%.*]]
407; CHECK:       loop:
408; CHECK-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
409; CHECK-NEXT:    [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
410; CHECK-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
411; CHECK-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
412; CHECK-NEXT:    store i32 [[Y0]], ptr [[PTR]], align 4
413; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
414; CHECK-NEXT:    [[NEXTI]] = add i64 [[I]], 1
415; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
416; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
417; CHECK:       exit:
418; CHECK-NEXT:    ret void
419;
420entry:
421  br label %loop
422loop:
423  %i = phi i64 [0, %entry], [%nexti, %loop]
424  %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
425
426  %x0 = load i32, ptr %ptr
427  %y0 = add i32 %x0, 1
428  store i32 %y0, ptr %ptr
429
430  %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 %stride
431  %nexti = add i64 %i, 1
432  %done = icmp eq i64 %nexti, 1024
433  br i1 %done, label %exit, label %loop
434exit:
435  ret void
436}
437
438define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) {
439; NOSTRIDED-LABEL: @double_stride_int_scaled(
440; NOSTRIDED-NEXT:  entry:
441; NOSTRIDED-NEXT:    [[P3:%.*]] = ptrtoint ptr [[P:%.*]] to i64
442; NOSTRIDED-NEXT:    [[P21:%.*]] = ptrtoint ptr [[P2:%.*]] to i64
443; NOSTRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
444; NOSTRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
445; NOSTRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]])
446; NOSTRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
447; NOSTRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
448; NOSTRIDED:       vector.scevcheck:
449; NOSTRIDED-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
450; NOSTRIDED-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_MEMCHECK:%.*]]
451; NOSTRIDED:       vector.memcheck:
452; NOSTRIDED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
453; NOSTRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
454; NOSTRIDED-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
455; NOSTRIDED-NEXT:    [[TMP6:%.*]] = sub i64 [[P21]], [[P3]]
456; NOSTRIDED-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]]
457; NOSTRIDED-NEXT:    br i1 [[DIFF_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
458; NOSTRIDED:       vector.ph:
459; NOSTRIDED-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
460; NOSTRIDED-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 4
461; NOSTRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP8]]
462; NOSTRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
463; NOSTRIDED-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
464; NOSTRIDED-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 4
465; NOSTRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
466; NOSTRIDED:       vector.body:
467; NOSTRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
468; NOSTRIDED-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 0
469; NOSTRIDED-NEXT:    [[TMP12:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP11]]
470; NOSTRIDED-NEXT:    [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
471; NOSTRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP13]], align 4
472; NOSTRIDED-NEXT:    [[TMP14:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
473; NOSTRIDED-NEXT:    [[TMP15:%.*]] = getelementptr i32, ptr [[P2]], i64 [[TMP11]]
474; NOSTRIDED-NEXT:    [[TMP16:%.*]] = getelementptr i32, ptr [[TMP15]], i32 0
475; NOSTRIDED-NEXT:    store <vscale x 4 x i32> [[TMP14]], ptr [[TMP16]], align 4
476; NOSTRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
477; NOSTRIDED-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
478; NOSTRIDED-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
479; NOSTRIDED:       middle.block:
480; NOSTRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
481; NOSTRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
482; NOSTRIDED:       scalar.ph:
483; NOSTRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
484; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
485; NOSTRIDED:       loop:
486; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
487; NOSTRIDED-NEXT:    [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
488; NOSTRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
489; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
490; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
491; NOSTRIDED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P2]], i64 [[OFFSET]]
492; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[Q1]], align 4
493; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
494; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
495; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]]
496; NOSTRIDED:       exit:
497; NOSTRIDED-NEXT:    ret void
498;
499; STRIDED-LABEL: @double_stride_int_scaled(
500; STRIDED-NEXT:  entry:
501; STRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
502; STRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
503; STRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 24, i64 [[TMP1]])
504; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
505; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
506; STRIDED:       vector.memcheck:
507; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 4092
508; STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
509; STRIDED-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
510; STRIDED-NEXT:    [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
511; STRIDED-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
512; STRIDED-NEXT:    [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
513; STRIDED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
514; STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
515; STRIDED-NEXT:    [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
516; STRIDED-NEXT:    [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
517; STRIDED-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
518; STRIDED-NEXT:    [[UMAX4:%.*]] = select i1 [[TMP7]], ptr [[P]], ptr [[SCEVGEP2]]
519; STRIDED-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[UMAX4]], i64 4
520; STRIDED-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]]
521; STRIDED-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]]
522; STRIDED-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
523; STRIDED-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
524; STRIDED:       vector.ph:
525; STRIDED-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
526; STRIDED-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 4
527; STRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
528; STRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
529; STRIDED-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
530; STRIDED-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 4
531; STRIDED-NEXT:    [[TMP12:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
532; STRIDED-NEXT:    [[TMP14:%.*]] = mul <vscale x 4 x i64> [[TMP12]], splat (i64 1)
533; STRIDED-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP14]]
534; STRIDED-NEXT:    [[TMP17:%.*]] = mul i64 1, [[TMP11]]
535; STRIDED-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
536; STRIDED-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
537; STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
538; STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
539; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
540; STRIDED:       vector.body:
541; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
542; STRIDED-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
543; STRIDED-NEXT:    [[TMP18:%.*]] = mul nuw nsw <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
544; STRIDED-NEXT:    [[TMP19:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP18]]
545; STRIDED-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP19]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META8:![0-9]+]]
546; STRIDED-NEXT:    [[TMP20:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
547; STRIDED-NEXT:    [[TMP21:%.*]] = getelementptr i32, ptr [[P2]], <vscale x 4 x i64> [[TMP18]]
548; STRIDED-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP20]], <vscale x 4 x ptr> [[TMP21]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META8]]
549; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
550; STRIDED-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
551; STRIDED-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
552; STRIDED-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
553; STRIDED:       middle.block:
554; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
555; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
556; STRIDED:       scalar.ph:
557; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ]
558; STRIDED-NEXT:    br label [[LOOP:%.*]]
559; STRIDED:       loop:
560; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
561; STRIDED-NEXT:    [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]]
562; STRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
563; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
564; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
565; STRIDED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P2]], i64 [[OFFSET]]
566; STRIDED-NEXT:    store i32 [[Y0]], ptr [[Q1]], align 4
567; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
568; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
569; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP14:![0-9]+]]
570; STRIDED:       exit:
571; STRIDED-NEXT:    ret void
572;
573entry:
574  br label %loop
575loop:
576  %i = phi i64 [0, %entry], [%nexti, %loop]
577
578  %offset = mul nsw nuw i64 %i, %stride
579  %q0 = getelementptr i32, ptr %p, i64 %offset
580  %x0 = load i32, ptr %q0
581  %y0 = add i32 %x0, 1
582  %q1 = getelementptr i32, ptr %p2, i64 %offset
583  store i32 %y0, ptr %q1
584
585  %nexti = add i64 %i, 1
586  %done = icmp eq i64 %nexti, 1024
587  br i1 %done, label %exit, label %loop
588exit:
589  ret void
590}
591
592define void @double_stride_int_iv(ptr %p, ptr %p2, i64 %stride) {
593; NOSTRIDED-LABEL: @double_stride_int_iv(
594; NOSTRIDED-NEXT:  entry:
595; NOSTRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
596; NOSTRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
597; NOSTRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 8, i64 [[TMP1]])
598; NOSTRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
599; NOSTRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
600; NOSTRIDED:       vector.scevcheck:
601; NOSTRIDED-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i64 [[STRIDE:%.*]], 1
602; NOSTRIDED-NEXT:    br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
603; NOSTRIDED:       vector.ph:
604; NOSTRIDED-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
605; NOSTRIDED-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 4
606; NOSTRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP4]]
607; NOSTRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
608; NOSTRIDED-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
609; NOSTRIDED-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
610; NOSTRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
611; NOSTRIDED:       vector.body:
612; NOSTRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
613; NOSTRIDED-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
614; NOSTRIDED-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP7]]
615; NOSTRIDED-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[TMP8]], i32 0
616; NOSTRIDED-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP9]], align 4
617; NOSTRIDED-NEXT:    [[TMP10:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 1)
618; NOSTRIDED-NEXT:    store <vscale x 4 x i32> [[TMP10]], ptr [[TMP9]], align 4
619; NOSTRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
620; NOSTRIDED-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
621; NOSTRIDED-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
622; NOSTRIDED:       middle.block:
623; NOSTRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
624; NOSTRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
625; NOSTRIDED:       scalar.ph:
626; NOSTRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
627; NOSTRIDED-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
628; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
629; NOSTRIDED:       loop:
630; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
631; NOSTRIDED-NEXT:    [[OFFSET:%.*]] = phi i64 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
632; NOSTRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
633; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
634; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
635; NOSTRIDED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
636; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[Q1]], align 4
637; NOSTRIDED-NEXT:    [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE]]
638; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
639; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
640; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]]
641; NOSTRIDED:       exit:
642; NOSTRIDED-NEXT:    ret void
643;
644; STRIDED-LABEL: @double_stride_int_iv(
645; STRIDED-NEXT:  entry:
646; STRIDED-NEXT:    br label [[LOOP:%.*]]
647; STRIDED:       loop:
648; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
649; STRIDED-NEXT:    [[OFFSET:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[OFFSET_NEXT:%.*]], [[LOOP]] ]
650; STRIDED-NEXT:    [[Q0:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET]]
651; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[Q0]], align 4
652; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
653; STRIDED-NEXT:    [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]]
654; STRIDED-NEXT:    store i32 [[Y0]], ptr [[Q1]], align 4
655; STRIDED-NEXT:    [[OFFSET_NEXT]] = add nuw nsw i64 [[OFFSET]], [[STRIDE:%.*]]
656; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
657; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
658; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
659; STRIDED:       exit:
660; STRIDED-NEXT:    ret void
661;
662entry:
663  br label %loop
664loop:
665  %i = phi i64 [0, %entry], [%nexti, %loop]
666  %offset = phi i64 [0, %entry], [%offset.next, %loop]
667
668  %q0 = getelementptr i32, ptr %p, i64 %offset
669  %x0 = load i32, ptr %q0
670  %y0 = add i32 %x0, 1
671  %q1 = getelementptr i32, ptr %p, i64 %offset
672  store i32 %y0, ptr %q1
673
674  %offset.next = add nsw nuw i64 %offset, %stride
675  %nexti = add i64 %i, 1
676  %done = icmp eq i64 %nexti, 1024
677  br i1 %done, label %exit, label %loop
678exit:
679  ret void
680}
681
682
683define void @double_stride_ptr_iv(ptr %p, ptr %p2, i64 %stride) {
684; NOSTRIDED-LABEL: @double_stride_ptr_iv(
685; NOSTRIDED-NEXT:  entry:
686; NOSTRIDED-NEXT:    br label [[LOOP:%.*]]
687; NOSTRIDED:       loop:
688; NOSTRIDED-NEXT:    [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
689; NOSTRIDED-NEXT:    [[PTR:%.*]] = phi ptr [ [[P:%.*]], [[ENTRY]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
690; NOSTRIDED-NEXT:    [[PTR2:%.*]] = phi ptr [ [[P2:%.*]], [[ENTRY]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
691; NOSTRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
692; NOSTRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
693; NOSTRIDED-NEXT:    store i32 [[Y0]], ptr [[PTR2]], align 4
694; NOSTRIDED-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE:%.*]]
695; NOSTRIDED-NEXT:    [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
696; NOSTRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
697; NOSTRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
698; NOSTRIDED-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]]
699; NOSTRIDED:       exit:
700; NOSTRIDED-NEXT:    ret void
701;
702; STRIDED-LABEL: @double_stride_ptr_iv(
703; STRIDED-NEXT:  entry:
704; STRIDED-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
705; STRIDED-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
706; STRIDED-NEXT:    [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 32, i64 [[TMP1]])
707; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP2]]
708; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
709; STRIDED:       vector.memcheck:
710; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[STRIDE:%.*]], 1023
711; STRIDED-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P2:%.*]], i64 [[TMP3]]
712; STRIDED-NEXT:    [[TMP4:%.*]] = icmp ult ptr [[P2]], [[SCEVGEP]]
713; STRIDED-NEXT:    [[UMIN:%.*]] = select i1 [[TMP4]], ptr [[P2]], ptr [[SCEVGEP]]
714; STRIDED-NEXT:    [[TMP5:%.*]] = icmp ugt ptr [[P2]], [[SCEVGEP]]
715; STRIDED-NEXT:    [[UMAX:%.*]] = select i1 [[TMP5]], ptr [[P2]], ptr [[SCEVGEP]]
716; STRIDED-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[UMAX]], i64 4
717; STRIDED-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[TMP3]]
718; STRIDED-NEXT:    [[TMP6:%.*]] = icmp ult ptr [[P]], [[SCEVGEP2]]
719; STRIDED-NEXT:    [[UMIN3:%.*]] = select i1 [[TMP6]], ptr [[P]], ptr [[SCEVGEP2]]
720; STRIDED-NEXT:    [[TMP7:%.*]] = icmp ugt ptr [[P]], [[SCEVGEP2]]
721; STRIDED-NEXT:    [[UMAX4:%.*]] = select i1 [[TMP7]], ptr [[P]], ptr [[SCEVGEP2]]
722; STRIDED-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[UMAX4]], i64 4
723; STRIDED-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[UMIN]], [[SCEVGEP5]]
724; STRIDED-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[UMIN3]], [[SCEVGEP1]]
725; STRIDED-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
726; STRIDED-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
727; STRIDED:       vector.ph:
728; STRIDED-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
729; STRIDED-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 4
730; STRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP9]]
731; STRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
732; STRIDED-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vscale.i64()
733; STRIDED-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 4
734; STRIDED-NEXT:    [[TMP10:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
735; STRIDED-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[P]], i64 [[TMP10]]
736; STRIDED-NEXT:    [[TMP11:%.*]] = mul i64 [[N_VEC]], [[STRIDE]]
737; STRIDED-NEXT:    [[IND_END7:%.*]] = getelementptr i8, ptr [[P2]], i64 [[TMP11]]
738; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
739; STRIDED:       vector.body:
740; STRIDED-NEXT:    [[POINTER_PHI:%.*]] = phi ptr [ [[P]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
741; STRIDED-NEXT:    [[POINTER_PHI11:%.*]] = phi ptr [ [[P2]], [[VECTOR_PH]] ], [ [[PTR_IND12:%.*]], [[VECTOR_BODY]] ]
742; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
743; STRIDED-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
744; STRIDED-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 4
745; STRIDED-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 1
746; STRIDED-NEXT:    [[TMP17:%.*]] = mul i64 [[STRIDE]], [[TMP16]]
747; STRIDED-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP15]], 0
748; STRIDED-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP18]], i64 0
749; STRIDED-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
750; STRIDED-NEXT:    [[TMP19:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
751; STRIDED-NEXT:    [[TMP20:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT]], [[TMP19]]
752; STRIDED-NEXT:    [[DOTSPLATINSERT9:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[STRIDE]], i64 0
753; STRIDED-NEXT:    [[DOTSPLAT10:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT9]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
754; STRIDED-NEXT:    [[TMP21:%.*]] = mul <vscale x 4 x i64> [[TMP20]], [[DOTSPLAT10]]
755; STRIDED-NEXT:    [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <vscale x 4 x i64> [[TMP21]]
756; STRIDED-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
757; STRIDED-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 4
758; STRIDED-NEXT:    [[TMP24:%.*]] = mul i64 [[TMP23]], 1
759; STRIDED-NEXT:    [[TMP25:%.*]] = mul i64 [[STRIDE]], [[TMP24]]
760; STRIDED-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP23]], 0
761; STRIDED-NEXT:    [[DOTSPLATINSERT13:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP26]], i64 0
762; STRIDED-NEXT:    [[DOTSPLAT14:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT13]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
763; STRIDED-NEXT:    [[TMP27:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
764; STRIDED-NEXT:    [[TMP28:%.*]] = add <vscale x 4 x i64> [[DOTSPLAT14]], [[TMP27]]
765; STRIDED-NEXT:    [[TMP29:%.*]] = mul <vscale x 4 x i64> [[TMP28]], [[DOTSPLAT10]]
766; STRIDED-NEXT:    [[VECTOR_GEP17:%.*]] = getelementptr i8, ptr [[POINTER_PHI11]], <vscale x 4 x i64> [[TMP29]]
767; STRIDED-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[VECTOR_GEP]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> poison), !alias.scope [[META15:![0-9]+]]
768; STRIDED-NEXT:    [[TMP30:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], splat (i32 1)
769; STRIDED-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP30]], <vscale x 4 x ptr> [[VECTOR_GEP17]], i32 4, <vscale x 4 x i1> splat (i1 true)), !alias.scope [[META18:![0-9]+]], !noalias [[META15]]
770; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
771; STRIDED-NEXT:    [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP17]]
772; STRIDED-NEXT:    [[PTR_IND12]] = getelementptr i8, ptr [[POINTER_PHI11]], i64 [[TMP25]]
773; STRIDED-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
774; STRIDED-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
775; STRIDED:       middle.block:
776; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
777; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
778; STRIDED:       scalar.ph:
779; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY:%.*]] ]
780; STRIDED-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[P]], [[VECTOR_MEMCHECK]] ], [ [[P]], [[ENTRY]] ]
781; STRIDED-NEXT:    [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END7]], [[MIDDLE_BLOCK]] ], [ [[P2]], [[VECTOR_MEMCHECK]] ], [ [[P2]], [[ENTRY]] ]
782; STRIDED-NEXT:    br label [[LOOP:%.*]]
783; STRIDED:       loop:
784; STRIDED-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
785; STRIDED-NEXT:    [[PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[PTR_NEXT:%.*]], [[LOOP]] ]
786; STRIDED-NEXT:    [[PTR2:%.*]] = phi ptr [ [[BC_RESUME_VAL8]], [[SCALAR_PH]] ], [ [[PTR2_NEXT:%.*]], [[LOOP]] ]
787; STRIDED-NEXT:    [[X0:%.*]] = load i32, ptr [[PTR]], align 4
788; STRIDED-NEXT:    [[Y0:%.*]] = add i32 [[X0]], 1
789; STRIDED-NEXT:    store i32 [[Y0]], ptr [[PTR2]], align 4
790; STRIDED-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[STRIDE]]
791; STRIDED-NEXT:    [[PTR2_NEXT]] = getelementptr inbounds i8, ptr [[PTR2]], i64 [[STRIDE]]
792; STRIDED-NEXT:    [[NEXTI]] = add i64 [[I]], 1
793; STRIDED-NEXT:    [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024
794; STRIDED-NEXT:    br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
795; STRIDED:       exit:
796; STRIDED-NEXT:    ret void
797;
798entry:
799  br label %loop
800loop:
801  %i = phi i64 [0, %entry], [%nexti, %loop]
802  %ptr = phi ptr [%p, %entry], [%ptr.next, %loop]
803  %ptr2 = phi ptr [%p2, %entry], [%ptr2.next, %loop]
804
805  %x0 = load i32, ptr %ptr
806  %y0 = add i32 %x0, 1
807  store i32 %y0, ptr %ptr2
808
809  %ptr.next = getelementptr inbounds i8, ptr %ptr, i64 %stride
810  %ptr2.next = getelementptr inbounds i8, ptr %ptr2, i64 %stride
811  %nexti = add i64 %i, 1
812  %done = icmp eq i64 %nexti, 1024
813  br i1 %done, label %exit, label %loop
814exit:
815  ret void
816}
817