xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; REQUIRES: asserts
3; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-minimum-VF=0 -prefer-predicate-over-epilogue=scalar-epilogue \
4; RUN:   -debug-only=loop-vectorize -force-target-instruction-cost=1 -S 2>%t | FileCheck %s --check-prefix=CHECK
5; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
6; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -epilogue-vectorization-force-VF=8 \
7; RUN:   -debug-only=loop-vectorize -S 2>%t | FileCheck %s --check-prefix=CHECK-VF8
8; RUN: cat %t | FileCheck %s --check-prefix=DEBUG-FORCED
9
10target triple = "aarch64-linux-gnu"
11
12; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16'
13; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
14; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
15
16; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16'
17; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
18; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass)
19; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
20
21define void @main_vf_vscale_x_16(ptr %A) #0 {
22; CHECK-LABEL: @main_vf_vscale_x_16(
23; CHECK-NEXT:  iter.check:
24; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
25; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 8
26; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
27; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
28; CHECK:       vector.main.loop.iter.check:
29; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
30; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 32
31; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 1024, [[TMP3]]
32; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
33; CHECK:       vector.ph:
34; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
35; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 32
36; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP5]]
37; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
38; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
39; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 32
40; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
41; CHECK:       vector.body:
42; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
43; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
44; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP8]]
45; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 0
46; CHECK-NEXT:    [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
47; CHECK-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP17]], 16
48; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 [[TMP18]]
49; CHECK-NEXT:    store <vscale x 16 x i8> splat (i8 1), ptr [[TMP16]], align 1
50; CHECK-NEXT:    store <vscale x 16 x i8> splat (i8 1), ptr [[TMP19]], align 1
51; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
52; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
53; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
54; CHECK:       middle.block:
55; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
56; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
57; CHECK:       vec.epilog.iter.check:
58; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
59; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
60; CHECK-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], 8
61; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]]
62; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
63; CHECK:       vec.epilog.ph:
64; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
65; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
66; CHECK-NEXT:    [[TMP24:%.*]] = mul i64 [[TMP23]], 8
67; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 1024, [[TMP24]]
68; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 1024, [[N_MOD_VF2]]
69; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
70; CHECK-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP25]], 8
71; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
72; CHECK:       vec.epilog.vector.body:
73; CHECK-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
74; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX5]], 0
75; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP27]]
76; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[TMP28]], i32 0
77; CHECK-NEXT:    store <vscale x 8 x i8> splat (i8 1), ptr [[TMP29]], align 1
78; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], [[TMP26]]
79; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]]
80; CHECK-NEXT:    br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
81; CHECK:       vec.epilog.middle.block:
82; CHECK-NEXT:    [[CMP_N4:%.*]] = icmp eq i64 1024, [[N_VEC3]]
83; CHECK-NEXT:    br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
84; CHECK:       vec.epilog.scalar.ph:
85; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
86; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
87; CHECK:       for.body:
88; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
89; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
90; CHECK-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
91; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
92; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
93; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]]
94; CHECK:       exit:
95; CHECK-NEXT:    ret void
96;
97; CHECK-VF8-LABEL: @main_vf_vscale_x_16(
98; CHECK-VF8-NEXT:  iter.check:
99; CHECK-VF8-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
100; CHECK-VF8:       vector.main.loop.iter.check:
101; CHECK-VF8-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
102; CHECK-VF8-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 32
103; CHECK-VF8-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
104; CHECK-VF8-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
105; CHECK-VF8:       vector.ph:
106; CHECK-VF8-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
107; CHECK-VF8-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 32
108; CHECK-VF8-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
109; CHECK-VF8-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
110; CHECK-VF8-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
111; CHECK-VF8-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 32
112; CHECK-VF8-NEXT:    br label [[VECTOR_BODY:%.*]]
113; CHECK-VF8:       vector.body:
114; CHECK-VF8-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
115; CHECK-VF8-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
116; CHECK-VF8-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP6]]
117; CHECK-VF8-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 0
118; CHECK-VF8-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
119; CHECK-VF8-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 16
120; CHECK-VF8-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 [[TMP16]]
121; CHECK-VF8-NEXT:    store <vscale x 16 x i8> splat (i8 1), ptr [[TMP14]], align 1
122; CHECK-VF8-NEXT:    store <vscale x 16 x i8> splat (i8 1), ptr [[TMP17]], align 1
123; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
124; CHECK-VF8-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
125; CHECK-VF8-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
126; CHECK-VF8:       middle.block:
127; CHECK-VF8-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
128; CHECK-VF8-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
129; CHECK-VF8:       vec.epilog.iter.check:
130; CHECK-VF8-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
131; CHECK-VF8-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
132; CHECK-VF8-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
133; CHECK-VF8:       vec.epilog.ph:
134; CHECK-VF8-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
135; CHECK-VF8-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
136; CHECK-VF8:       vec.epilog.vector.body:
137; CHECK-VF8-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
138; CHECK-VF8-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
139; CHECK-VF8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP19]]
140; CHECK-VF8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i32 0
141; CHECK-VF8-NEXT:    store <8 x i8> splat (i8 1), ptr [[TMP21]], align 1
142; CHECK-VF8-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
143; CHECK-VF8-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
144; CHECK-VF8-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
145; CHECK-VF8:       vec.epilog.middle.block:
146; CHECK-VF8-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
147; CHECK-VF8:       vec.epilog.scalar.ph:
148; CHECK-VF8-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
149; CHECK-VF8-NEXT:    br label [[FOR_BODY:%.*]]
150; CHECK-VF8:       for.body:
151; CHECK-VF8-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
152; CHECK-VF8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
153; CHECK-VF8-NEXT:    store i8 1, ptr [[ARRAYIDX]], align 1
154; CHECK-VF8-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
155; CHECK-VF8-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
156; CHECK-VF8-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP4:![0-9]+]]
157; CHECK-VF8:       exit:
158; CHECK-VF8-NEXT:    ret void
159;
160entry:
161  br label %for.body
162
163for.body:
164  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
165  %arrayidx = getelementptr inbounds i8, ptr %A, i64 %iv
166  store i8 1, ptr %arrayidx, align 1
167  %iv.next = add nuw nsw i64 %iv, 1
168  %exitcond = icmp ne i64 %iv.next, 1024
169  br i1 %exitcond, label %for.body, label %exit
170
171exit:
172  ret void
173}
174
175
176; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_2'
177; DEBUG: Create Skeleton for epilogue vectorized loop (first pass)
178; DEBUG: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
179
180; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_2'
181; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced.
182; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass)
183; DEBUG-FORCED: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1
184
185; When the vector.body uses VF=vscale x 1 (or VF=vscale x 2 because
186; that's the minimum supported VF by SVE), we could still use a wide
187; fixed-width VF=8 for the epilogue if the vectors are known to be
188; sufficiently wide. This information can be deduced from vscale_range or
189; VScaleForTuning (set by mcpu/mtune).
190define void @main_vf_vscale_x_2(ptr %A) #0 vscale_range(8, 8) {
191; CHECK-LABEL: @main_vf_vscale_x_2(
192; CHECK-NEXT:  iter.check:
193; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
194; CHECK:       vector.main.loop.iter.check:
195; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
196; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
197; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
198; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
199; CHECK:       vector.ph:
200; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
201; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
202; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
203; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
204; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
205; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
206; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
207; CHECK:       vector.body:
208; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
209; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
210; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]]
211; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
212; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
213; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
214; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
215; CHECK-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1
216; CHECK-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
217; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
218; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
219; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
220; CHECK:       middle.block:
221; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
222; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
223; CHECK:       vec.epilog.iter.check:
224; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
225; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
226; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
227; CHECK:       vec.epilog.ph:
228; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
229; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
230; CHECK:       vec.epilog.vector.body:
231; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
232; CHECK-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
233; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]]
234; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
235; CHECK-NEXT:    store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
236; CHECK-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
237; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
238; CHECK-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
239; CHECK:       vec.epilog.middle.block:
240; CHECK-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
241; CHECK:       vec.epilog.scalar.ph:
242; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
243; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
244; CHECK:       for.body:
245; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
246; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
247; CHECK-NEXT:    store i64 1, ptr [[ARRAYIDX]], align 1
248; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
249; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
250; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
251; CHECK:       exit:
252; CHECK-NEXT:    ret void
253;
254; CHECK-VF8-LABEL: @main_vf_vscale_x_2(
255; CHECK-VF8-NEXT:  iter.check:
256; CHECK-VF8-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
257; CHECK-VF8:       vector.main.loop.iter.check:
258; CHECK-VF8-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
259; CHECK-VF8-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
260; CHECK-VF8-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
261; CHECK-VF8-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
262; CHECK-VF8:       vector.ph:
263; CHECK-VF8-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
264; CHECK-VF8-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
265; CHECK-VF8-NEXT:    [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
266; CHECK-VF8-NEXT:    [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
267; CHECK-VF8-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
268; CHECK-VF8-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
269; CHECK-VF8-NEXT:    br label [[VECTOR_BODY:%.*]]
270; CHECK-VF8:       vector.body:
271; CHECK-VF8-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
272; CHECK-VF8-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
273; CHECK-VF8-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]]
274; CHECK-VF8-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0
275; CHECK-VF8-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
276; CHECK-VF8-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
277; CHECK-VF8-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP16]]
278; CHECK-VF8-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP14]], align 1
279; CHECK-VF8-NEXT:    store <vscale x 2 x i64> splat (i64 1), ptr [[TMP17]], align 1
280; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
281; CHECK-VF8-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
282; CHECK-VF8-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
283; CHECK-VF8:       middle.block:
284; CHECK-VF8-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
285; CHECK-VF8-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
286; CHECK-VF8:       vec.epilog.iter.check:
287; CHECK-VF8-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 1024, [[N_VEC]]
288; CHECK-VF8-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
289; CHECK-VF8-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
290; CHECK-VF8:       vec.epilog.ph:
291; CHECK-VF8-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
292; CHECK-VF8-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
293; CHECK-VF8:       vec.epilog.vector.body:
294; CHECK-VF8-NEXT:    [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
295; CHECK-VF8-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX1]], 0
296; CHECK-VF8-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP19]]
297; CHECK-VF8-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[TMP20]], i32 0
298; CHECK-VF8-NEXT:    store <8 x i64> splat (i64 1), ptr [[TMP21]], align 1
299; CHECK-VF8-NEXT:    [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8
300; CHECK-VF8-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024
301; CHECK-VF8-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
302; CHECK-VF8:       vec.epilog.middle.block:
303; CHECK-VF8-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
304; CHECK-VF8:       vec.epilog.scalar.ph:
305; CHECK-VF8-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
306; CHECK-VF8-NEXT:    br label [[FOR_BODY:%.*]]
307; CHECK-VF8:       for.body:
308; CHECK-VF8-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
309; CHECK-VF8-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
310; CHECK-VF8-NEXT:    store i64 1, ptr [[ARRAYIDX]], align 1
311; CHECK-VF8-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
312; CHECK-VF8-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024
313; CHECK-VF8-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
314; CHECK-VF8:       exit:
315; CHECK-VF8-NEXT:    ret void
316;
317entry:
318  br label %for.body
319
320for.body:
321  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
322  %arrayidx = getelementptr inbounds i64, ptr %A, i64 %iv
323  store i64 1, ptr %arrayidx, align 1
324  %iv.next = add nuw nsw i64 %iv, 1
325  %exitcond = icmp ne i64 %iv.next, 1024
326  br i1 %exitcond, label %for.body, label %exit
327
328exit:
329  ret void
330}
331
332; FIXME: The epilogue loop is currently miscompiled: the pointer induction
333; uses an incorrect resume value.
334define void @test_pr57912_pointer_induction(ptr %start) #0 {
335; CHECK-LABEL: @test_pr57912_pointer_induction(
336; CHECK-NEXT:  iter.check:
337; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
338; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 8
339; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]]
340; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
341; CHECK:       vector.main.loop.iter.check:
342; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
343; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 32
344; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 10000, [[TMP3]]
345; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
346; CHECK:       vector.ph:
347; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
348; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 32
349; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP5]]
350; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
351; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
352; CHECK-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 32
353; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
354; CHECK:       vector.body:
355; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
356; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
357; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP8]]
358; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i32 0
359; CHECK-NEXT:    [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
360; CHECK-NEXT:    [[TMP18:%.*]] = mul i64 [[TMP17]], 16
361; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[TMP14]], i64 [[TMP18]]
362; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP16]], align 1
363; CHECK-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP19]], align 1
364; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]]
365; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
366; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
367; CHECK:       middle.block:
368; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
369; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
370; CHECK:       vec.epilog.iter.check:
371; CHECK-NEXT:    [[IND_END4:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
372; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]]
373; CHECK-NEXT:    [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
374; CHECK-NEXT:    [[TMP22:%.*]] = mul i64 [[TMP21]], 8
375; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP22]]
376; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
377; CHECK:       vec.epilog.ph:
378; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
379; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
380; CHECK-NEXT:    [[TMP24:%.*]] = mul i64 [[TMP23]], 8
381; CHECK-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 10000, [[TMP24]]
382; CHECK-NEXT:    [[N_VEC3:%.*]] = sub i64 10000, [[N_MOD_VF2]]
383; CHECK-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
384; CHECK-NEXT:    [[TMP26:%.*]] = mul i64 [[TMP25]], 8
385; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC3]]
386; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
387; CHECK:       vec.epilog.vector.body:
388; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
389; CHECK-NEXT:    [[TMP27:%.*]] = add i64 [[INDEX7]], 0
390; CHECK-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP27]]
391; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[TMP28]], i32 0
392; CHECK-NEXT:    store <vscale x 8 x i8> zeroinitializer, ptr [[TMP29]], align 1
393; CHECK-NEXT:    [[INDEX_NEXT8]] = add nuw i64 [[INDEX7]], [[TMP26]]
394; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]]
395; CHECK-NEXT:    br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
396; CHECK:       vec.epilog.middle.block:
397; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 10000, [[N_VEC3]]
398; CHECK-NEXT:    br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
399; CHECK:       vec.epilog.scalar.ph:
400; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
401; CHECK-NEXT:    [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ]
402; CHECK-NEXT:    br label [[LOOP:%.*]]
403; CHECK:       loop:
404; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
405; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
406; CHECK-NEXT:    store i8 0, ptr [[PTR_IV]], align 1
407; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
408; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
409; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000
410; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
411; CHECK:       exit:
412; CHECK-NEXT:    ret void
413;
414; CHECK-VF8-LABEL: @test_pr57912_pointer_induction(
415; CHECK-VF8-NEXT:  iter.check:
416; CHECK-VF8-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
417; CHECK-VF8:       vector.main.loop.iter.check:
418; CHECK-VF8-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
419; CHECK-VF8-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 32
420; CHECK-VF8-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]]
421; CHECK-VF8-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
422; CHECK-VF8:       vector.ph:
423; CHECK-VF8-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
424; CHECK-VF8-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 32
425; CHECK-VF8-NEXT:    [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]]
426; CHECK-VF8-NEXT:    [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]]
427; CHECK-VF8-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
428; CHECK-VF8-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 32
429; CHECK-VF8-NEXT:    br label [[VECTOR_BODY:%.*]]
430; CHECK-VF8:       vector.body:
431; CHECK-VF8-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
432; CHECK-VF8-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
433; CHECK-VF8-NEXT:    [[TMP12:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP6]]
434; CHECK-VF8-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0
435; CHECK-VF8-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
436; CHECK-VF8-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 16
437; CHECK-VF8-NEXT:    [[TMP17:%.*]] = getelementptr i8, ptr [[TMP12]], i64 [[TMP16]]
438; CHECK-VF8-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP14]], align 1
439; CHECK-VF8-NEXT:    store <vscale x 16 x i8> zeroinitializer, ptr [[TMP17]], align 1
440; CHECK-VF8-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
441; CHECK-VF8-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
442; CHECK-VF8-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
443; CHECK-VF8:       middle.block:
444; CHECK-VF8-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]]
445; CHECK-VF8-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
446; CHECK-VF8:       vec.epilog.iter.check:
447; CHECK-VF8-NEXT:    [[IND_END1:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
448; CHECK-VF8-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 10000, [[N_VEC]]
449; CHECK-VF8-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8
450; CHECK-VF8-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
451; CHECK-VF8:       vec.epilog.ph:
452; CHECK-VF8-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
453; CHECK-VF8-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 10000
454; CHECK-VF8-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
455; CHECK-VF8:       vec.epilog.vector.body:
456; CHECK-VF8-NEXT:    [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
457; CHECK-VF8-NEXT:    [[TMP19:%.*]] = add i64 [[INDEX3]], 0
458; CHECK-VF8-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP19]]
459; CHECK-VF8-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i32 0
460; CHECK-VF8-NEXT:    store <8 x i8> zeroinitializer, ptr [[TMP21]], align 1
461; CHECK-VF8-NEXT:    [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 8
462; CHECK-VF8-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 10000
463; CHECK-VF8-NEXT:    br i1 [[TMP22]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
464; CHECK-VF8:       vec.epilog.middle.block:
465; CHECK-VF8-NEXT:    br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
466; CHECK-VF8:       vec.epilog.scalar.ph:
467; CHECK-VF8-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
468; CHECK-VF8-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[START]], [[ITER_CHECK]] ], [ [[IND_END1]], [[VEC_EPILOG_ITER_CHECK]] ]
469; CHECK-VF8-NEXT:    br label [[LOOP:%.*]]
470; CHECK-VF8:       loop:
471; CHECK-VF8-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
472; CHECK-VF8-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
473; CHECK-VF8-NEXT:    store i8 0, ptr [[PTR_IV]], align 1
474; CHECK-VF8-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 1
475; CHECK-VF8-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
476; CHECK-VF8-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 10000
477; CHECK-VF8-NEXT:    br i1 [[CMP]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]]
478; CHECK-VF8:       exit:
479; CHECK-VF8-NEXT:    ret void
480;
481entry:
482  br label %loop
483
484loop:
485  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
486  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
487  store i8 0, ptr %ptr.iv, align 1
488  %ptr.iv.next = getelementptr inbounds i8, ptr %ptr.iv, i64 1
489  %iv.next = add nuw nsw i64 %iv, 1
490  %cmp = icmp eq i64 %iv.next, 10000
491  br i1 %cmp, label %exit, label %loop
492
493exit:
494  ret void
495}
496
497attributes #0 = { "target-features"="+sve" }
498