xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll (revision 29441e4f5fa5f5c7709f7cf180815ba97f611297)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; REQUIRES: asserts
3; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -mattr=+sve 2>%t | FileCheck %s --check-prefixes=CHECK,CHECK-VS1
4; RUN: cat %t | FileCheck %s --check-prefixes=DEBUG,DEBUG-VS1
5; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -mcpu=neoverse-v1 -sve-tail-folding=disabled 2>%t | FileCheck %s --check-prefixes=CHECK,CHECK-VS2
6; RUN: cat %t | FileCheck %s --check-prefixes=DEBUG,DEBUG-VS2
7
8target triple = "aarch64-unknown-linux-gnu"
9
10; DEBUG-LABEL: LV: Checking a loop in 'low_vf_ic_is_better'
11; DEBUG: LV: Found trip count: 0
12; DEBUG: LV: Found maximum trip count: 19
13; DEBUG: LV: IC is 1
14; DEBUG-VS1: LV: VF is vscale x 16
15; DEBUG-VS1: Main Loop VF:vscale x 16, Main Loop UF:1, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1
16; DEBUG-VS2: LV: VF is vscale x 8
17; DEBUG-VS2: Main Loop VF:vscale x 8, Main Loop UF:1, Epilogue Loop VF:vscale x 4, Epilogue Loop UF:1
18
19; DEBUG-LABEL: LV: Checking a loop in 'trip_count_too_small'
20; DEBUG: LV: Found a loop with a very small trip count. This loop is worth vectorizing only if no scalar iteration overheads are incurred.
21; DEBUG: LV: Not vectorizing: The trip count is below the minial threshold value..
22
23; DEBUG-LABEL: LV: Checking a loop in 'too_many_runtime_checks'
24; DEBUG: LV: Found trip count: 0
25; DEBUG: LV: Found maximum trip count: 16
26; DEBUG: LV: Clamping the MaxVF to maximum power of two not exceeding the constant trip count: 16
27; DEBUG: LV: IC is 1
28; DEBUG: LV: VF is 16
29; DEBUG: LV: Vectorization is not beneficial: expected trip count < minimum profitable VF (16 < 32)
30; DEBUG: LV: Too many memory checks needed.
31
32; DEBUG-LABEL: LV: Checking a loop in 'overflow_indvar_known_false'
33; DEBUG: LV: Found trip count: 0
34; DEBUG: LV: Found maximum trip count: 1027
35; DEBUG: LV: can fold tail by masking.
36; DEBUG: Executing best plan with VF=vscale x 16, UF=1
37
38define void @low_vf_ic_is_better(ptr nocapture noundef %p, i32 %tc, i16 noundef %val) {
39; CHECK-VS1-LABEL: define void @low_vf_ic_is_better(
40; CHECK-VS1-SAME: ptr noundef captures(none) [[P:%.*]], i32 [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
41; CHECK-VS1-NEXT:  [[ENTRY:.*:]]
42; CHECK-VS1-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TC]], 19
43; CHECK-VS1-NEXT:    br i1 [[CMP7]], label %[[ITER_CHECK:.*]], label %[[WHILE_END:.*]]
44; CHECK-VS1:       [[ITER_CHECK]]:
45; CHECK-VS1-NEXT:    [[CONV:%.*]] = trunc i16 [[VAL]] to i8
46; CHECK-VS1-NEXT:    [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
47; CHECK-VS1-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[TC]] to i64
48; CHECK-VS1-NEXT:    [[TMP1:%.*]] = add i32 [[TC]], 1
49; CHECK-VS1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
50; CHECK-VS1-NEXT:    [[TMP3:%.*]] = sub i64 20, [[TMP2]]
51; CHECK-VS1-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
52; CHECK-VS1-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 8
53; CHECK-VS1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]]
54; CHECK-VS1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
55; CHECK-VS1:       [[VECTOR_SCEVCHECK]]:
56; CHECK-VS1-NEXT:    [[TMP6:%.*]] = add i32 [[TC]], 1
57; CHECK-VS1-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
58; CHECK-VS1-NEXT:    [[TMP8:%.*]] = sub i64 19, [[TMP7]]
59; CHECK-VS1-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
60; CHECK-VS1-NEXT:    [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]]
61; CHECK-VS1-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]]
62; CHECK-VS1-NEXT:    [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295
63; CHECK-VS1-NEXT:    [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
64; CHECK-VS1-NEXT:    br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
65; CHECK-VS1:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
66; CHECK-VS1-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
67; CHECK-VS1-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 16
68; CHECK-VS1-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
69; CHECK-VS1-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
70; CHECK-VS1:       [[VECTOR_PH]]:
71; CHECK-VS1-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
72; CHECK-VS1-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 16
73; CHECK-VS1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
74; CHECK-VS1-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
75; CHECK-VS1-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
76; CHECK-VS1-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 16
77; CHECK-VS1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
78; CHECK-VS1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
79; CHECK-VS1-NEXT:    br label %[[VECTOR_BODY:.*]]
80; CHECK-VS1:       [[VECTOR_BODY]]:
81; CHECK-VS1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
82; CHECK-VS1-NEXT:    [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
83; CHECK-VS1-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 0
84; CHECK-VS1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
85; CHECK-VS1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
86; CHECK-VS1-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP23]], align 1
87; CHECK-VS1-NEXT:    [[TMP24:%.*]] = add <vscale x 16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
88; CHECK-VS1-NEXT:    store <vscale x 16 x i8> [[TMP24]], ptr [[TMP23]], align 1
89; CHECK-VS1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
90; CHECK-VS1-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
91; CHECK-VS1-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
92; CHECK-VS1:       [[MIDDLE_BLOCK]]:
93; CHECK-VS1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
94; CHECK-VS1-NEXT:    br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
95; CHECK-VS1:       [[VEC_EPILOG_ITER_CHECK]]:
96; CHECK-VS1-NEXT:    [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]]
97; CHECK-VS1-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
98; CHECK-VS1-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
99; CHECK-VS1-NEXT:    [[TMP27:%.*]] = mul i64 [[TMP26]], 8
100; CHECK-VS1-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]]
101; CHECK-VS1-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
102; CHECK-VS1:       [[VEC_EPILOG_PH]]:
103; CHECK-VS1-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
104; CHECK-VS1-NEXT:    [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
105; CHECK-VS1-NEXT:    [[TMP29:%.*]] = mul i64 [[TMP28]], 8
106; CHECK-VS1-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]]
107; CHECK-VS1-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]]
108; CHECK-VS1-NEXT:    [[TMP30:%.*]] = call i64 @llvm.vscale.i64()
109; CHECK-VS1-NEXT:    [[TMP31:%.*]] = mul i64 [[TMP30]], 8
110; CHECK-VS1-NEXT:    [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]]
111; CHECK-VS1-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0
112; CHECK-VS1-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
113; CHECK-VS1-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
114; CHECK-VS1:       [[VEC_EPILOG_VECTOR_BODY]]:
115; CHECK-VS1-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
116; CHECK-VS1-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
117; CHECK-VS1-NEXT:    [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
118; CHECK-VS1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
119; CHECK-VS1-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
120; CHECK-VS1-NEXT:    [[WIDE_LOAD6:%.*]] = load <vscale x 8 x i8>, ptr [[TMP34]], align 1
121; CHECK-VS1-NEXT:    [[TMP35:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
122; CHECK-VS1-NEXT:    store <vscale x 8 x i8> [[TMP35]], ptr [[TMP34]], align 1
123; CHECK-VS1-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP31]]
124; CHECK-VS1-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
125; CHECK-VS1-NEXT:    br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
126; CHECK-VS1:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
127; CHECK-VS1-NEXT:    [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]]
128; CHECK-VS1-NEXT:    br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
129; CHECK-VS1:       [[VEC_EPILOG_SCALAR_PH]]:
130; CHECK-VS1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ]
131; CHECK-VS1-NEXT:    br label %[[WHILE_BODY:.*]]
132; CHECK-VS1:       [[WHILE_BODY]]:
133; CHECK-VS1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ]
134; CHECK-VS1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
135; CHECK-VS1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[IV]]
136; CHECK-VS1-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
137; CHECK-VS1-NEXT:    [[ADD:%.*]] = add i8 [[TMP37]], [[CONV]]
138; CHECK-VS1-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
139; CHECK-VS1-NEXT:    [[TMP38:%.*]] = and i64 [[IV_NEXT]], 4294967295
140; CHECK-VS1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP38]], 19
141; CHECK-VS1-NEXT:    br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
142; CHECK-VS1:       [[WHILE_END_LOOPEXIT]]:
143; CHECK-VS1-NEXT:    br label %[[WHILE_END]]
144; CHECK-VS1:       [[WHILE_END]]:
145; CHECK-VS1-NEXT:    ret void
146;
147; CHECK-VS2-LABEL: define void @low_vf_ic_is_better(
148; CHECK-VS2-SAME: ptr noundef captures(none) [[P:%.*]], i32 [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
149; CHECK-VS2-NEXT:  [[ENTRY:.*:]]
150; CHECK-VS2-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TC]], 19
151; CHECK-VS2-NEXT:    br i1 [[CMP7]], label %[[ITER_CHECK:.*]], label %[[WHILE_END:.*]]
152; CHECK-VS2:       [[ITER_CHECK]]:
153; CHECK-VS2-NEXT:    [[CONV:%.*]] = trunc i16 [[VAL]] to i8
154; CHECK-VS2-NEXT:    [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
155; CHECK-VS2-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[TC]] to i64
156; CHECK-VS2-NEXT:    [[TMP1:%.*]] = add i32 [[TC]], 1
157; CHECK-VS2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
158; CHECK-VS2-NEXT:    [[TMP3:%.*]] = sub i64 20, [[TMP2]]
159; CHECK-VS2-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
160; CHECK-VS2-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
161; CHECK-VS2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]]
162; CHECK-VS2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
163; CHECK-VS2:       [[VECTOR_SCEVCHECK]]:
164; CHECK-VS2-NEXT:    [[TMP6:%.*]] = add i32 [[TC]], 1
165; CHECK-VS2-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
166; CHECK-VS2-NEXT:    [[TMP8:%.*]] = sub i64 19, [[TMP7]]
167; CHECK-VS2-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
168; CHECK-VS2-NEXT:    [[TMP10:%.*]] = add i32 [[TMP6]], [[TMP9]]
169; CHECK-VS2-NEXT:    [[TMP11:%.*]] = icmp ult i32 [[TMP10]], [[TMP6]]
170; CHECK-VS2-NEXT:    [[TMP12:%.*]] = icmp ugt i64 [[TMP8]], 4294967295
171; CHECK-VS2-NEXT:    [[TMP13:%.*]] = or i1 [[TMP11]], [[TMP12]]
172; CHECK-VS2-NEXT:    br i1 [[TMP13]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
173; CHECK-VS2:       [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
174; CHECK-VS2-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
175; CHECK-VS2-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 8
176; CHECK-VS2-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP3]], [[TMP15]]
177; CHECK-VS2-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
178; CHECK-VS2:       [[VECTOR_PH]]:
179; CHECK-VS2-NEXT:    [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
180; CHECK-VS2-NEXT:    [[TMP17:%.*]] = mul i64 [[TMP16]], 8
181; CHECK-VS2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP17]]
182; CHECK-VS2-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
183; CHECK-VS2-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
184; CHECK-VS2-NEXT:    [[TMP19:%.*]] = mul i64 [[TMP18]], 8
185; CHECK-VS2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i8> poison, i8 [[CONV]], i64 0
186; CHECK-VS2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
187; CHECK-VS2-NEXT:    br label %[[VECTOR_BODY:.*]]
188; CHECK-VS2:       [[VECTOR_BODY]]:
189; CHECK-VS2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
190; CHECK-VS2-NEXT:    [[TMP20:%.*]] = add i64 [[TMP0]], [[INDEX]]
191; CHECK-VS2-NEXT:    [[TMP21:%.*]] = add i64 [[TMP20]], 0
192; CHECK-VS2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP21]]
193; CHECK-VS2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP22]], i32 0
194; CHECK-VS2-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[TMP23]], align 1
195; CHECK-VS2-NEXT:    [[TMP24:%.*]] = add <vscale x 8 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
196; CHECK-VS2-NEXT:    store <vscale x 8 x i8> [[TMP24]], ptr [[TMP23]], align 1
197; CHECK-VS2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]]
198; CHECK-VS2-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
199; CHECK-VS2-NEXT:    br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
200; CHECK-VS2:       [[MIDDLE_BLOCK]]:
201; CHECK-VS2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
202; CHECK-VS2-NEXT:    br i1 [[CMP_N]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
203; CHECK-VS2:       [[VEC_EPILOG_ITER_CHECK]]:
204; CHECK-VS2-NEXT:    [[IND_END4:%.*]] = add i64 [[TMP0]], [[N_VEC]]
205; CHECK-VS2-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP3]], [[N_VEC]]
206; CHECK-VS2-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vscale.i64()
207; CHECK-VS2-NEXT:    [[TMP27:%.*]] = mul i64 [[TMP26]], 4
208; CHECK-VS2-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], [[TMP27]]
209; CHECK-VS2-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
210; CHECK-VS2:       [[VEC_EPILOG_PH]]:
211; CHECK-VS2-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
212; CHECK-VS2-NEXT:    [[TMP28:%.*]] = call i64 @llvm.vscale.i64()
213; CHECK-VS2-NEXT:    [[TMP29:%.*]] = mul i64 [[TMP28]], 4
214; CHECK-VS2-NEXT:    [[N_MOD_VF2:%.*]] = urem i64 [[TMP3]], [[TMP29]]
215; CHECK-VS2-NEXT:    [[N_VEC3:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF2]]
216; CHECK-VS2-NEXT:    [[TMP30:%.*]] = call i64 @llvm.vscale.i64()
217; CHECK-VS2-NEXT:    [[TMP31:%.*]] = mul i64 [[TMP30]], 4
218; CHECK-VS2-NEXT:    [[TMP39:%.*]] = add i64 [[TMP0]], [[N_VEC3]]
219; CHECK-VS2-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[CONV]], i64 0
220; CHECK-VS2-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
221; CHECK-VS2-NEXT:    br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
222; CHECK-VS2:       [[VEC_EPILOG_VECTOR_BODY]]:
223; CHECK-VS2-NEXT:    [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
224; CHECK-VS2-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX5]]
225; CHECK-VS2-NEXT:    [[TMP32:%.*]] = add i64 [[OFFSET_IDX]], 0
226; CHECK-VS2-NEXT:    [[TMP33:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP32]]
227; CHECK-VS2-NEXT:    [[TMP34:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP33]], i32 0
228; CHECK-VS2-NEXT:    [[WIDE_LOAD6:%.*]] = load <vscale x 4 x i8>, ptr [[TMP34]], align 1
229; CHECK-VS2-NEXT:    [[TMP35:%.*]] = add <vscale x 4 x i8> [[WIDE_LOAD6]], [[BROADCAST_SPLAT8]]
230; CHECK-VS2-NEXT:    store <vscale x 4 x i8> [[TMP35]], ptr [[TMP34]], align 1
231; CHECK-VS2-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX5]], [[TMP31]]
232; CHECK-VS2-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]]
233; CHECK-VS2-NEXT:    br i1 [[TMP36]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
234; CHECK-VS2:       [[VEC_EPILOG_MIDDLE_BLOCK]]:
235; CHECK-VS2-NEXT:    [[CMP_N10:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC3]]
236; CHECK-VS2-NEXT:    br i1 [[CMP_N10]], label %[[WHILE_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
237; CHECK-VS2:       [[VEC_EPILOG_SCALAR_PH]]:
238; CHECK-VS2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP39]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[ITER_CHECK]] ], [ [[IND_END4]], %[[VEC_EPILOG_ITER_CHECK]] ]
239; CHECK-VS2-NEXT:    br label %[[WHILE_BODY:.*]]
240; CHECK-VS2:       [[WHILE_BODY]]:
241; CHECK-VS2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[WHILE_BODY]] ]
242; CHECK-VS2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
243; CHECK-VS2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[IV]]
244; CHECK-VS2-NEXT:    [[TMP37:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
245; CHECK-VS2-NEXT:    [[ADD:%.*]] = add i8 [[TMP37]], [[CONV]]
246; CHECK-VS2-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
247; CHECK-VS2-NEXT:    [[TMP38:%.*]] = and i64 [[IV_NEXT]], 4294967295
248; CHECK-VS2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP38]], 19
249; CHECK-VS2-NEXT:    br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
250; CHECK-VS2:       [[WHILE_END_LOOPEXIT]]:
251; CHECK-VS2-NEXT:    br label %[[WHILE_END]]
252; CHECK-VS2:       [[WHILE_END]]:
253; CHECK-VS2-NEXT:    ret void
254;
255entry:
256  %cmp7 = icmp ult i32 %tc, 19
257  br i1 %cmp7, label %while.preheader, label %while.end
258
259while.preheader:
260  %conv = trunc i16 %val to i8
261  %v = getelementptr inbounds nuw i8, ptr %p, i64 4
262  %0 = zext nneg i32 %tc to i64
263  br label %while.body
264
265while.body:
266  %iv = phi i64 [ %0, %while.preheader ], [ %iv.next, %while.body ]
267  %iv.next = add nuw nsw i64 %iv, 1
268  %arrayidx = getelementptr inbounds nuw i8, ptr %v, i64 %iv
269  %1 = load i8, ptr %arrayidx, align 1
270  %add = add i8 %1, %conv
271  store i8 %add, ptr %arrayidx, align 1
272  %2 = and i64 %iv.next, 4294967295
273  %exitcond.not = icmp eq i64 %2, 19
274  br i1 %exitcond.not, label %while.end, label %while.body
275
276while.end:
277  ret void
278}
279
280define void @trip_count_too_small(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) {
281; CHECK-LABEL: define void @trip_count_too_small(
282; CHECK-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
283; CHECK-NEXT:  [[ENTRY:.*:]]
284; CHECK-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TC]], 3
285; CHECK-NEXT:    br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
286; CHECK:       [[WHILE_PREHEADER]]:
287; CHECK-NEXT:    [[CONV:%.*]] = trunc i16 [[VAL]] to i8
288; CHECK-NEXT:    [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
289; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[TC]] to i64
290; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
291; CHECK:       [[WHILE_BODY]]:
292; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], %[[WHILE_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
293; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
294; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
295; CHECK-NEXT:    [[TMP43:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
296; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP43]], [[CONV]]
297; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
298; CHECK-NEXT:    [[TMP44:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
299; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP44]], 3
300; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]]
301; CHECK:       [[WHILE_END_LOOPEXIT]]:
302; CHECK-NEXT:    br label %[[WHILE_END]]
303; CHECK:       [[WHILE_END]]:
304; CHECK-NEXT:    ret void
305;
306entry:
307  %cmp7 = icmp ult i32 %tc, 3
308  br i1 %cmp7, label %while.preheader, label %while.end
309
310while.preheader:
311  %conv = trunc i16 %val to i8
312  %v = getelementptr inbounds nuw i8, ptr %p, i64 4
313  %0 = zext nneg i32 %tc to i64
314  br label %while.body
315
316while.body:
317  %iv = phi i64 [ %0, %while.preheader ], [ %iv.next, %while.body ]
318  %iv.next = add nuw nsw i64 %iv, 1
319  %arrayidx = getelementptr inbounds nuw i8, ptr %v, i64 %iv
320  %1 = load i8, ptr %arrayidx, align 1
321  %add = add i8 %1, %conv
322  store i8 %add, ptr %arrayidx, align 1
323  %2 = and i64 %iv.next, 4294967295
324  %exitcond.not = icmp eq i64 %2, 3
325  br i1 %exitcond.not, label %while.end, label %while.body
326
327while.end:
328  ret void
329}
330
331define void @too_many_runtime_checks(ptr nocapture noundef %p, ptr nocapture noundef %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3, i32 noundef %tc, i16 noundef %val) {
332; CHECK-LABEL: define void @too_many_runtime_checks(
333; CHECK-SAME: ptr noundef captures(none) [[P:%.*]], ptr noundef captures(none) [[P1:%.*]], ptr noundef readonly captures(none) [[P2:%.*]], ptr noundef readonly captures(none) [[P3:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR0]] {
334; CHECK-NEXT:  [[ENTRY:.*:]]
335; CHECK-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[TC]], 16
336; CHECK-NEXT:    br i1 [[CMP20]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
337; CHECK:       [[WHILE_PREHEADER]]:
338; CHECK-NEXT:    [[CONV8:%.*]] = trunc i16 [[VAL]] to i8
339; CHECK-NEXT:    [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
340; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i32 [[TC]] to i64
341; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
342; CHECK:       [[WHILE_BODY]]:
343; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[TMP1]], %[[WHILE_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
344; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[P2]], i64 [[INDVARS_IV]]
345; CHECK-NEXT:    [[TMP60:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
346; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i8, ptr [[P3]], i64 [[INDVARS_IV]]
347; CHECK-NEXT:    [[TMP61:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
348; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP61]], [[TMP60]]
349; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], i64 [[INDVARS_IV]]
350; CHECK-NEXT:    [[TMP62:%.*]] = load i8, ptr [[ARRAYIDX5]], align 1
351; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[MUL]], [[TMP62]]
352; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX5]], align 1
353; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
354; CHECK-NEXT:    [[TMP63:%.*]] = load i8, ptr [[ARRAYIDX10]], align 1
355; CHECK-NEXT:    [[ADD12:%.*]] = add i8 [[TMP63]], [[CONV8]]
356; CHECK-NEXT:    store i8 [[ADD12]], ptr [[ARRAYIDX10]], align 1
357; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
358; CHECK-NEXT:    [[TMP64:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
359; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP64]], 16
360; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT:.*]], label %[[WHILE_BODY]]
361; CHECK:       [[WHILE_END_LOOPEXIT]]:
362; CHECK-NEXT:    br label %[[WHILE_END]]
363; CHECK:       [[WHILE_END]]:
364; CHECK-NEXT:    ret void
365;
366entry:
367  %cmp20 = icmp ult i32 %tc, 16
368  br i1 %cmp20, label %while.preheader, label %while.end
369
370while.preheader:
371  %0 = trunc i16 %val to i8
372  %v = getelementptr inbounds nuw i8, ptr %p, i64 4
373  %1 = zext nneg i32 %tc to i64
374  br label %while.body
375
376while.body:
377  %iv = phi i64 [ %1, %while.preheader ], [ %iv.next, %while.body ]
378  %arrayidx = getelementptr inbounds nuw i8, ptr %p2, i64 %iv
379  %2 = load i8, ptr %arrayidx, align 1
380  %arrayidx2 = getelementptr inbounds nuw i8, ptr %p3, i64 %iv
381  %3 = load i8, ptr %arrayidx2, align 1
382  %mul = mul i8 %3, %2
383  %arrayidx5 = getelementptr inbounds nuw i8, ptr %p1, i64 %iv
384  %4 = load i8, ptr %arrayidx5, align 1
385  %add = add i8 %mul, %4
386  store i8 %add, ptr %arrayidx5, align 1
387  %arrayidx10 = getelementptr inbounds nuw i8, ptr %v, i64 %iv
388  %5 = load i8, ptr %arrayidx10, align 1
389  %add12 = add i8 %5, %0
390  store i8 %add12, ptr %arrayidx10, align 1
391  %iv.next = add nuw nsw i64 %iv, 1
392  %6 = and i64 %iv.next, 4294967295
393  %exitcond.not = icmp eq i64 %6, 16
394  br i1 %exitcond.not, label %while.end, label %while.body
395
396while.end:
397  ret void
398}
399
400define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %tc, i16 noundef %val) vscale_range(1,16) {
401; CHECK-LABEL: define void @overflow_indvar_known_false(
402; CHECK-SAME: ptr noundef captures(none) [[P:%.*]], i32 noundef [[TC:%.*]], i16 noundef [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
403; CHECK-NEXT:  [[ENTRY:.*:]]
404; CHECK-NEXT:    [[CMP7:%.*]] = icmp ult i32 [[TC]], 1027
405; CHECK-NEXT:    br i1 [[CMP7]], label %[[WHILE_PREHEADER:.*]], label %[[WHILE_END:.*]]
406; CHECK:       [[WHILE_PREHEADER]]:
407; CHECK-NEXT:    [[CONV:%.*]] = trunc i16 [[VAL]] to i8
408; CHECK-NEXT:    [[V:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 4
409; CHECK-NEXT:    [[TMP0:%.*]] = zext nneg i32 [[TC]] to i64
410; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[TC]], 1
411; CHECK-NEXT:    [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
412; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 1028, [[TMP20]]
413; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
414; CHECK:       [[VECTOR_SCEVCHECK]]:
415; CHECK-NEXT:    [[TMP21:%.*]] = add i32 [[TC]], 1
416; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP21]] to i64
417; CHECK-NEXT:    [[TMP23:%.*]] = sub i64 1027, [[TMP22]]
418; CHECK-NEXT:    [[TMP24:%.*]] = trunc i64 [[TMP23]] to i32
419; CHECK-NEXT:    [[TMP25:%.*]] = add i32 [[TMP21]], [[TMP24]]
420; CHECK-NEXT:    [[TMP26:%.*]] = icmp ult i32 [[TMP25]], [[TMP21]]
421; CHECK-NEXT:    [[TMP27:%.*]] = icmp ugt i64 [[TMP23]], 4294967295
422; CHECK-NEXT:    [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]]
423; CHECK-NEXT:    br i1 [[TMP28]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
424; CHECK:       [[VECTOR_PH]]:
425; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
426; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 16
427; CHECK-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP3]], 1
428; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP1]], [[TMP4]]
429; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP3]]
430; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
431; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
432; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 16
433; CHECK-NEXT:    [[IND_END:%.*]] = add i64 [[TMP0]], [[N_VEC]]
434; CHECK-NEXT:    [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 [[TMP1]])
435; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[CONV]], i64 0
436; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
437; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
438; CHECK:       [[VECTOR_BODY]]:
439; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
440; CHECK-NEXT:    [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
441; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
442; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 0
443; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[TMP12]]
444; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i32 0
445; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
446; CHECK-NEXT:    [[TMP15:%.*]] = add <vscale x 16 x i8> [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]]
447; CHECK-NEXT:    call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP14]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
448; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
449; CHECK-NEXT:    [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]])
450; CHECK-NEXT:    [[TMP16:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
451; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <vscale x 16 x i1> [[TMP16]], i32 0
452; CHECK-NEXT:    br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
453; CHECK:       [[MIDDLE_BLOCK]]:
454; CHECK-NEXT:    br i1 true, label %[[WHILE_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
455; CHECK:       [[SCALAR_PH]]:
456; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[WHILE_PREHEADER]] ]
457; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
458; CHECK:       [[WHILE_BODY]]:
459; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
460; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
461; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 [[INDVARS_IV]]
462; CHECK-NEXT:    [[TMP18:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
463; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[TMP18]], [[CONV]]
464; CHECK-NEXT:    store i8 [[ADD]], ptr [[ARRAYIDX]], align 1
465; CHECK-NEXT:    [[TMP29:%.*]] = and i64 [[INDVARS_IV_NEXT]], 4294967295
466; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[TMP29]], 1027
467; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[WHILE_END_LOOPEXIT]], label %[[WHILE_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
468; CHECK:       [[WHILE_END_LOOPEXIT]]:
469; CHECK-NEXT:    br label %[[WHILE_END]]
470; CHECK:       [[WHILE_END]]:
471; CHECK-NEXT:    ret void
472;
473entry:
474  %cmp7 = icmp ult i32 %tc, 1027
475  br i1 %cmp7, label %while.preheader, label %while.end
476
477while.preheader:
478  %conv = trunc i16 %val to i8
479  %v = getelementptr inbounds nuw i8, ptr %p, i64 4
480  %0 = zext nneg i32 %tc to i64
481  br label %while.body
482
483while.body:
484  %iv = phi i64 [ %0, %while.preheader ], [ %iv.next, %while.body ]
485  %iv.next = add nuw nsw i64 %iv, 1
486  %arrayidx = getelementptr inbounds nuw i8, ptr %v, i64 %iv
487  %1 = load i8, ptr %arrayidx, align 1
488  %add = add i8 %1, %conv
489  store i8 %add, ptr %arrayidx, align 1
490  %2 = and i64 %iv.next, 4294967295
491  %exitcond.not = icmp eq i64 %2, 1027
492  br i1 %exitcond.not, label %while.end, label %while.body, !llvm.loop !0
493
494while.end:
495  ret void
496}
497
498
499!0 = distinct !{!0, !1}
500!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
501;.
502; CHECK-VS1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
503; CHECK-VS1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
504; CHECK-VS1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
505; CHECK-VS1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
506; CHECK-VS1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
507; CHECK-VS1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
508; CHECK-VS1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
509;.
510; CHECK-VS2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
511; CHECK-VS2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
512; CHECK-VS2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
513; CHECK-VS2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
514; CHECK-VS2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
515; CHECK-VS2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
516; CHECK-VS2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]]}
517;.
518