xref: /llvm-project/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll (revision 0eaa69eb234798774a08743b64a9aa0cf71c5356)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF8UF1 %s
3; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -S %s | FileCheck --check-prefixes=VF8UF2 %s
4; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -S %s | FileCheck --check-prefixes=VF16UF1 %s
5
6target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
7
8; Check if the vector loop condition can be simplified to true for a given
9; VF/IC combination.
10define void @test_tc_less_than_16(ptr %A, i64 %N) {
11; VF8UF1-LABEL: define void @test_tc_less_than_16(
12; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
13; VF8UF1-NEXT:  [[ENTRY:.*]]:
14; VF8UF1-NEXT:    [[AND:%.*]] = and i64 [[N]], 15
15; VF8UF1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[AND]], 8
16; VF8UF1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
17; VF8UF1:       [[VECTOR_PH]]:
18; VF8UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[AND]], 8
19; VF8UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
20; VF8UF1-NEXT:    [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
21; VF8UF1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
22; VF8UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
23; VF8UF1:       [[VECTOR_BODY]]:
24; VF8UF1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25; VF8UF1-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
26; VF8UF1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]]
27; VF8UF1-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
28; VF8UF1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
29; VF8UF1-NEXT:    [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
30; VF8UF1-NEXT:    store <8 x i8> [[TMP4]], ptr [[TMP3]], align 1
31; VF8UF1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
32; VF8UF1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
33; VF8UF1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
34; VF8UF1:       [[MIDDLE_BLOCK]]:
35; VF8UF1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
36; VF8UF1-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
37; VF8UF1:       [[SCALAR_PH]]:
38; VF8UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
39; VF8UF1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
40; VF8UF1-NEXT:    br label %[[LOOP:.*]]
41; VF8UF1:       [[LOOP]]:
42; VF8UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
43; VF8UF1-NEXT:    [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
44; VF8UF1-NEXT:    [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
45; VF8UF1-NEXT:    [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
46; VF8UF1-NEXT:    [[ADD:%.*]] = add nsw i8 [[L]], 10
47; VF8UF1-NEXT:    store i8 [[ADD]], ptr [[P_SRC]], align 1
48; VF8UF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
49; VF8UF1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
50; VF8UF1-NEXT:    br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
51; VF8UF1:       [[EXIT]]:
52; VF8UF1-NEXT:    ret void
53;
54; VF8UF2-LABEL: define void @test_tc_less_than_16(
55; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
56; VF8UF2-NEXT:  [[ENTRY:.*]]:
57; VF8UF2-NEXT:    [[AND:%.*]] = and i64 [[N]], 15
58; VF8UF2-NEXT:    br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
59; VF8UF2:       [[VECTOR_PH]]:
60; VF8UF2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[AND]], 16
61; VF8UF2-NEXT:    [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
62; VF8UF2-NEXT:    [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
63; VF8UF2-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
64; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
65; VF8UF2:       [[VECTOR_BODY]]:
66; VF8UF2-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 0
67; VF8UF2-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
68; VF8UF2-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
69; VF8UF2-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
70; VF8UF2-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1
71; VF8UF2-NEXT:    [[TMP4:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
72; VF8UF2-NEXT:    [[TMP5:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
73; VF8UF2-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
74; VF8UF2-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
75; VF8UF2-NEXT:    store <8 x i8> [[TMP4]], ptr [[TMP6]], align 1
76; VF8UF2-NEXT:    store <8 x i8> [[TMP5]], ptr [[TMP7]], align 1
77; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
78; VF8UF2:       [[MIDDLE_BLOCK]]:
79; VF8UF2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
80; VF8UF2-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
81; VF8UF2:       [[SCALAR_PH]]:
82; VF8UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
83; VF8UF2-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
84; VF8UF2-NEXT:    br label %[[LOOP:.*]]
85; VF8UF2:       [[LOOP]]:
86; VF8UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
87; VF8UF2-NEXT:    [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
88; VF8UF2-NEXT:    [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
89; VF8UF2-NEXT:    [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
90; VF8UF2-NEXT:    [[ADD:%.*]] = add nsw i8 [[L]], 10
91; VF8UF2-NEXT:    store i8 [[ADD]], ptr [[P_SRC]], align 1
92; VF8UF2-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
93; VF8UF2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
94; VF8UF2-NEXT:    br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
95; VF8UF2:       [[EXIT]]:
96; VF8UF2-NEXT:    ret void
97;
98; VF16UF1-LABEL: define void @test_tc_less_than_16(
99; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
100; VF16UF1-NEXT:  [[ENTRY:.*]]:
101; VF16UF1-NEXT:    [[AND:%.*]] = and i64 [[N]], 15
102; VF16UF1-NEXT:    br i1 true, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
103; VF16UF1:       [[VECTOR_PH]]:
104; VF16UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[AND]], 16
105; VF16UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[AND]], [[N_MOD_VF]]
106; VF16UF1-NEXT:    [[TMP0:%.*]] = sub i64 [[AND]], [[N_VEC]]
107; VF16UF1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]]
108; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
109; VF16UF1:       [[VECTOR_BODY]]:
110; VF16UF1-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 0
111; VF16UF1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
112; VF16UF1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
113; VF16UF1-NEXT:    [[TMP3:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
114; VF16UF1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0
115; VF16UF1-NEXT:    store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1
116; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
117; VF16UF1:       [[MIDDLE_BLOCK]]:
118; VF16UF1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[AND]], [[N_VEC]]
119; VF16UF1-NEXT:    br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
120; VF16UF1:       [[SCALAR_PH]]:
121; VF16UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[AND]], %[[ENTRY]] ]
122; VF16UF1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
123; VF16UF1-NEXT:    br label %[[LOOP:.*]]
124; VF16UF1:       [[LOOP]]:
125; VF16UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
126; VF16UF1-NEXT:    [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
127; VF16UF1-NEXT:    [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
128; VF16UF1-NEXT:    [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
129; VF16UF1-NEXT:    [[ADD:%.*]] = add nsw i8 [[L]], 10
130; VF16UF1-NEXT:    store i8 [[ADD]], ptr [[P_SRC]], align 1
131; VF16UF1-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
132; VF16UF1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 0
133; VF16UF1-NEXT:    br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
134; VF16UF1:       [[EXIT]]:
135; VF16UF1-NEXT:    ret void
136;
137entry:
138  %and = and i64 %N, 15
139  br label %loop
140
141loop:
142  %iv = phi i64 [ %and, %entry ], [ %iv.next, %loop ]
143  %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
144  %p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
145  %l = load i8, ptr %p.src, align 1
146  %add = add nsw i8 %l, 10
147  store i8 %add, ptr %p.src
148  %iv.next = add nsw i64 %iv, -1
149  %cmp = icmp eq i64 %iv.next, 0
150  br i1 %cmp, label %exit, label %loop
151
152exit:
153  ret void
154}
155
156define void @remove_loop_region_with_replicate_recipe(ptr %dst, i64 range(i64 5, 10) %N) {
157; VF8UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
158; VF8UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
159; VF8UF1-NEXT:  [[ENTRY:.*]]:
160; VF8UF1-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N]], -2
161; VF8UF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
162; VF8UF1:       [[VECTOR_PH]]:
163; VF8UF1-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 7
164; VF8UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
165; VF8UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
166; VF8UF1-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
167; VF8UF1-NEXT:    [[TMP1:%.*]] = add i64 2, [[N_VEC]]
168; VF8UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
169; VF8UF1:       [[VECTOR_BODY]]:
170; VF8UF1-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
171; VF8UF1-NEXT:    [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
172; VF8UF1-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT1]]
173; VF8UF1-NEXT:    [[TMP3:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
174; VF8UF1-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
175; VF8UF1:       [[PRED_STORE_IF]]:
176; VF8UF1-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 2
177; VF8UF1-NEXT:    store i16 0, ptr [[TMP4]], align 2
178; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
179; VF8UF1:       [[PRED_STORE_CONTINUE]]:
180; VF8UF1-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
181; VF8UF1-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
182; VF8UF1:       [[PRED_STORE_IF1]]:
183; VF8UF1-NEXT:    [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 3
184; VF8UF1-NEXT:    store i16 0, ptr [[TMP6]], align 2
185; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
186; VF8UF1:       [[PRED_STORE_CONTINUE2]]:
187; VF8UF1-NEXT:    [[TMP7:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
188; VF8UF1-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
189; VF8UF1:       [[PRED_STORE_IF3]]:
190; VF8UF1-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 4
191; VF8UF1-NEXT:    store i16 0, ptr [[TMP8]], align 2
192; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
193; VF8UF1:       [[PRED_STORE_CONTINUE4]]:
194; VF8UF1-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
195; VF8UF1-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
196; VF8UF1:       [[PRED_STORE_IF5]]:
197; VF8UF1-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 5
198; VF8UF1-NEXT:    store i16 0, ptr [[TMP10]], align 2
199; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
200; VF8UF1:       [[PRED_STORE_CONTINUE6]]:
201; VF8UF1-NEXT:    [[TMP11:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
202; VF8UF1-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
203; VF8UF1:       [[PRED_STORE_IF7]]:
204; VF8UF1-NEXT:    [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 6
205; VF8UF1-NEXT:    store i16 0, ptr [[TMP12]], align 2
206; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
207; VF8UF1:       [[PRED_STORE_CONTINUE8]]:
208; VF8UF1-NEXT:    [[TMP13:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
209; VF8UF1-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
210; VF8UF1:       [[PRED_STORE_IF9]]:
211; VF8UF1-NEXT:    [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 7
212; VF8UF1-NEXT:    store i16 0, ptr [[TMP14]], align 2
213; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
214; VF8UF1:       [[PRED_STORE_CONTINUE10]]:
215; VF8UF1-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
216; VF8UF1-NEXT:    br i1 [[TMP15]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
217; VF8UF1:       [[PRED_STORE_IF11]]:
218; VF8UF1-NEXT:    [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 8
219; VF8UF1-NEXT:    store i16 0, ptr [[TMP16]], align 2
220; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
221; VF8UF1:       [[PRED_STORE_CONTINUE12]]:
222; VF8UF1-NEXT:    [[TMP17:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
223; VF8UF1-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
224; VF8UF1:       [[PRED_STORE_IF13]]:
225; VF8UF1-NEXT:    [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 9
226; VF8UF1-NEXT:    store i16 0, ptr [[TMP18]], align 2
227; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
228; VF8UF1:       [[PRED_STORE_CONTINUE14]]:
229; VF8UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
230; VF8UF1:       [[MIDDLE_BLOCK]]:
231; VF8UF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
232; VF8UF1:       [[SCALAR_PH]]:
233; VF8UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
234; VF8UF1-NEXT:    br label %[[LOOP:.*]]
235; VF8UF1:       [[LOOP]]:
236; VF8UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
237; VF8UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
238; VF8UF1-NEXT:    store i16 0, ptr [[GEP_DST]], align 2
239; VF8UF1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
240; VF8UF1-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
241; VF8UF1-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
242; VF8UF1:       [[EXIT]]:
243; VF8UF1-NEXT:    ret void
244;
245; VF8UF2-LABEL: define void @remove_loop_region_with_replicate_recipe(
246; VF8UF2-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
247; VF8UF2-NEXT:  [[ENTRY:.*]]:
248; VF8UF2-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N]], -2
249; VF8UF2-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
250; VF8UF2:       [[VECTOR_PH]]:
251; VF8UF2-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15
252; VF8UF2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
253; VF8UF2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
254; VF8UF2-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
255; VF8UF2-NEXT:    [[TMP1:%.*]] = add i64 2, [[N_VEC]]
256; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
257; VF8UF2:       [[VECTOR_BODY]]:
258; VF8UF2-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
259; VF8UF2-NEXT:    [[BROADCAST_SPLAT1:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> poison, <8 x i32> zeroinitializer
260; VF8UF2-NEXT:    [[TMP2:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT1]]
261; VF8UF2-NEXT:    [[TMP3:%.*]] = icmp ule <8 x i64> <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT1]]
262; VF8UF2-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP2]], i32 0
263; VF8UF2-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
264; VF8UF2:       [[PRED_STORE_IF]]:
265; VF8UF2-NEXT:    [[TMP5:%.*]] = getelementptr i16, ptr [[DST]], i64 2
266; VF8UF2-NEXT:    store i16 0, ptr [[TMP5]], align 2
267; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
268; VF8UF2:       [[PRED_STORE_CONTINUE]]:
269; VF8UF2-NEXT:    [[TMP6:%.*]] = extractelement <8 x i1> [[TMP2]], i32 1
270; VF8UF2-NEXT:    br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
271; VF8UF2:       [[PRED_STORE_IF1]]:
272; VF8UF2-NEXT:    [[TMP7:%.*]] = getelementptr i16, ptr [[DST]], i64 3
273; VF8UF2-NEXT:    store i16 0, ptr [[TMP7]], align 2
274; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
275; VF8UF2:       [[PRED_STORE_CONTINUE2]]:
276; VF8UF2-NEXT:    [[TMP8:%.*]] = extractelement <8 x i1> [[TMP2]], i32 2
277; VF8UF2-NEXT:    br i1 [[TMP8]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
278; VF8UF2:       [[PRED_STORE_IF3]]:
279; VF8UF2-NEXT:    [[TMP9:%.*]] = getelementptr i16, ptr [[DST]], i64 4
280; VF8UF2-NEXT:    store i16 0, ptr [[TMP9]], align 2
281; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
282; VF8UF2:       [[PRED_STORE_CONTINUE4]]:
283; VF8UF2-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP2]], i32 3
284; VF8UF2-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
285; VF8UF2:       [[PRED_STORE_IF5]]:
286; VF8UF2-NEXT:    [[TMP11:%.*]] = getelementptr i16, ptr [[DST]], i64 5
287; VF8UF2-NEXT:    store i16 0, ptr [[TMP11]], align 2
288; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
289; VF8UF2:       [[PRED_STORE_CONTINUE6]]:
290; VF8UF2-NEXT:    [[TMP12:%.*]] = extractelement <8 x i1> [[TMP2]], i32 4
291; VF8UF2-NEXT:    br i1 [[TMP12]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
292; VF8UF2:       [[PRED_STORE_IF7]]:
293; VF8UF2-NEXT:    [[TMP13:%.*]] = getelementptr i16, ptr [[DST]], i64 6
294; VF8UF2-NEXT:    store i16 0, ptr [[TMP13]], align 2
295; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
296; VF8UF2:       [[PRED_STORE_CONTINUE8]]:
297; VF8UF2-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP2]], i32 5
298; VF8UF2-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
299; VF8UF2:       [[PRED_STORE_IF9]]:
300; VF8UF2-NEXT:    [[TMP15:%.*]] = getelementptr i16, ptr [[DST]], i64 7
301; VF8UF2-NEXT:    store i16 0, ptr [[TMP15]], align 2
302; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
303; VF8UF2:       [[PRED_STORE_CONTINUE10]]:
304; VF8UF2-NEXT:    [[TMP16:%.*]] = extractelement <8 x i1> [[TMP2]], i32 6
305; VF8UF2-NEXT:    br i1 [[TMP16]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
306; VF8UF2:       [[PRED_STORE_IF11]]:
307; VF8UF2-NEXT:    [[TMP17:%.*]] = getelementptr i16, ptr [[DST]], i64 8
308; VF8UF2-NEXT:    store i16 0, ptr [[TMP17]], align 2
309; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
310; VF8UF2:       [[PRED_STORE_CONTINUE12]]:
311; VF8UF2-NEXT:    [[TMP18:%.*]] = extractelement <8 x i1> [[TMP2]], i32 7
312; VF8UF2-NEXT:    br i1 [[TMP18]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
313; VF8UF2:       [[PRED_STORE_IF13]]:
314; VF8UF2-NEXT:    [[TMP19:%.*]] = getelementptr i16, ptr [[DST]], i64 9
315; VF8UF2-NEXT:    store i16 0, ptr [[TMP19]], align 2
316; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
317; VF8UF2:       [[PRED_STORE_CONTINUE14]]:
318; VF8UF2-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
319; VF8UF2-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
320; VF8UF2:       [[PRED_STORE_IF15]]:
321; VF8UF2-NEXT:    [[TMP21:%.*]] = getelementptr i16, ptr [[DST]], i64 10
322; VF8UF2-NEXT:    store i16 0, ptr [[TMP21]], align 2
323; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
324; VF8UF2:       [[PRED_STORE_CONTINUE16]]:
325; VF8UF2-NEXT:    [[TMP22:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
326; VF8UF2-NEXT:    br i1 [[TMP22]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
327; VF8UF2:       [[PRED_STORE_IF17]]:
328; VF8UF2-NEXT:    [[TMP23:%.*]] = getelementptr i16, ptr [[DST]], i64 11
329; VF8UF2-NEXT:    store i16 0, ptr [[TMP23]], align 2
330; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
331; VF8UF2:       [[PRED_STORE_CONTINUE18]]:
332; VF8UF2-NEXT:    [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
333; VF8UF2-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
334; VF8UF2:       [[PRED_STORE_IF19]]:
335; VF8UF2-NEXT:    [[TMP25:%.*]] = getelementptr i16, ptr [[DST]], i64 12
336; VF8UF2-NEXT:    store i16 0, ptr [[TMP25]], align 2
337; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
338; VF8UF2:       [[PRED_STORE_CONTINUE20]]:
339; VF8UF2-NEXT:    [[TMP26:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
340; VF8UF2-NEXT:    br i1 [[TMP26]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
341; VF8UF2:       [[PRED_STORE_IF21]]:
342; VF8UF2-NEXT:    [[TMP27:%.*]] = getelementptr i16, ptr [[DST]], i64 13
343; VF8UF2-NEXT:    store i16 0, ptr [[TMP27]], align 2
344; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
345; VF8UF2:       [[PRED_STORE_CONTINUE22]]:
346; VF8UF2-NEXT:    [[TMP28:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
347; VF8UF2-NEXT:    br i1 [[TMP28]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
348; VF8UF2:       [[PRED_STORE_IF23]]:
349; VF8UF2-NEXT:    [[TMP29:%.*]] = getelementptr i16, ptr [[DST]], i64 14
350; VF8UF2-NEXT:    store i16 0, ptr [[TMP29]], align 2
351; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
352; VF8UF2:       [[PRED_STORE_CONTINUE24]]:
353; VF8UF2-NEXT:    [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
354; VF8UF2-NEXT:    br i1 [[TMP30]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
355; VF8UF2:       [[PRED_STORE_IF25]]:
356; VF8UF2-NEXT:    [[TMP31:%.*]] = getelementptr i16, ptr [[DST]], i64 15
357; VF8UF2-NEXT:    store i16 0, ptr [[TMP31]], align 2
358; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
359; VF8UF2:       [[PRED_STORE_CONTINUE26]]:
360; VF8UF2-NEXT:    [[TMP32:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
361; VF8UF2-NEXT:    br i1 [[TMP32]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
362; VF8UF2:       [[PRED_STORE_IF27]]:
363; VF8UF2-NEXT:    [[TMP33:%.*]] = getelementptr i16, ptr [[DST]], i64 16
364; VF8UF2-NEXT:    store i16 0, ptr [[TMP33]], align 2
365; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
366; VF8UF2:       [[PRED_STORE_CONTINUE28]]:
367; VF8UF2-NEXT:    [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
368; VF8UF2-NEXT:    br i1 [[TMP34]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
369; VF8UF2:       [[PRED_STORE_IF29]]:
370; VF8UF2-NEXT:    [[TMP35:%.*]] = getelementptr i16, ptr [[DST]], i64 17
371; VF8UF2-NEXT:    store i16 0, ptr [[TMP35]], align 2
372; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
373; VF8UF2:       [[PRED_STORE_CONTINUE30]]:
374; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
375; VF8UF2:       [[MIDDLE_BLOCK]]:
376; VF8UF2-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
377; VF8UF2:       [[SCALAR_PH]]:
378; VF8UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
379; VF8UF2-NEXT:    br label %[[LOOP:.*]]
380; VF8UF2:       [[LOOP]]:
381; VF8UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
382; VF8UF2-NEXT:    [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
383; VF8UF2-NEXT:    store i16 0, ptr [[GEP_DST]], align 2
384; VF8UF2-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
385; VF8UF2-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
386; VF8UF2-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
387; VF8UF2:       [[EXIT]]:
388; VF8UF2-NEXT:    ret void
389;
390; VF16UF1-LABEL: define void @remove_loop_region_with_replicate_recipe(
391; VF16UF1-SAME: ptr [[DST:%.*]], i64 range(i64 5, 10) [[N:%.*]]) {
392; VF16UF1-NEXT:  [[ENTRY:.*]]:
393; VF16UF1-NEXT:    [[TMP0:%.*]] = add nsw i64 [[N]], -2
394; VF16UF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
395; VF16UF1:       [[VECTOR_PH]]:
396; VF16UF1-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP0]], 15
397; VF16UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
398; VF16UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
399; VF16UF1-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
400; VF16UF1-NEXT:    [[TMP1:%.*]] = add i64 2, [[N_VEC]]
401; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
402; VF16UF1:       [[VECTOR_BODY]]:
403; VF16UF1-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
404; VF16UF1-NEXT:    [[BROADCAST_SPLAT1:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
405; VF16UF1-NEXT:    [[TMP2:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT1]]
406; VF16UF1-NEXT:    [[TMP3:%.*]] = extractelement <16 x i1> [[TMP2]], i32 0
407; VF16UF1-NEXT:    br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
408; VF16UF1:       [[PRED_STORE_IF]]:
409; VF16UF1-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 2
410; VF16UF1-NEXT:    store i16 0, ptr [[TMP4]], align 2
411; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
412; VF16UF1:       [[PRED_STORE_CONTINUE]]:
413; VF16UF1-NEXT:    [[TMP5:%.*]] = extractelement <16 x i1> [[TMP2]], i32 1
414; VF16UF1-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
415; VF16UF1:       [[PRED_STORE_IF1]]:
416; VF16UF1-NEXT:    [[TMP6:%.*]] = getelementptr i16, ptr [[DST]], i64 3
417; VF16UF1-NEXT:    store i16 0, ptr [[TMP6]], align 2
418; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
419; VF16UF1:       [[PRED_STORE_CONTINUE2]]:
420; VF16UF1-NEXT:    [[TMP7:%.*]] = extractelement <16 x i1> [[TMP2]], i32 2
421; VF16UF1-NEXT:    br i1 [[TMP7]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
422; VF16UF1:       [[PRED_STORE_IF3]]:
423; VF16UF1-NEXT:    [[TMP8:%.*]] = getelementptr i16, ptr [[DST]], i64 4
424; VF16UF1-NEXT:    store i16 0, ptr [[TMP8]], align 2
425; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
426; VF16UF1:       [[PRED_STORE_CONTINUE4]]:
427; VF16UF1-NEXT:    [[TMP9:%.*]] = extractelement <16 x i1> [[TMP2]], i32 3
428; VF16UF1-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
429; VF16UF1:       [[PRED_STORE_IF5]]:
430; VF16UF1-NEXT:    [[TMP10:%.*]] = getelementptr i16, ptr [[DST]], i64 5
431; VF16UF1-NEXT:    store i16 0, ptr [[TMP10]], align 2
432; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
433; VF16UF1:       [[PRED_STORE_CONTINUE6]]:
434; VF16UF1-NEXT:    [[TMP11:%.*]] = extractelement <16 x i1> [[TMP2]], i32 4
435; VF16UF1-NEXT:    br i1 [[TMP11]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
436; VF16UF1:       [[PRED_STORE_IF7]]:
437; VF16UF1-NEXT:    [[TMP12:%.*]] = getelementptr i16, ptr [[DST]], i64 6
438; VF16UF1-NEXT:    store i16 0, ptr [[TMP12]], align 2
439; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
440; VF16UF1:       [[PRED_STORE_CONTINUE8]]:
441; VF16UF1-NEXT:    [[TMP13:%.*]] = extractelement <16 x i1> [[TMP2]], i32 5
442; VF16UF1-NEXT:    br i1 [[TMP13]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
443; VF16UF1:       [[PRED_STORE_IF9]]:
444; VF16UF1-NEXT:    [[TMP14:%.*]] = getelementptr i16, ptr [[DST]], i64 7
445; VF16UF1-NEXT:    store i16 0, ptr [[TMP14]], align 2
446; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
447; VF16UF1:       [[PRED_STORE_CONTINUE10]]:
448; VF16UF1-NEXT:    [[TMP15:%.*]] = extractelement <16 x i1> [[TMP2]], i32 6
449; VF16UF1-NEXT:    br i1 [[TMP15]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
450; VF16UF1:       [[PRED_STORE_IF11]]:
451; VF16UF1-NEXT:    [[TMP16:%.*]] = getelementptr i16, ptr [[DST]], i64 8
452; VF16UF1-NEXT:    store i16 0, ptr [[TMP16]], align 2
453; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
454; VF16UF1:       [[PRED_STORE_CONTINUE12]]:
455; VF16UF1-NEXT:    [[TMP17:%.*]] = extractelement <16 x i1> [[TMP2]], i32 7
456; VF16UF1-NEXT:    br i1 [[TMP17]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
457; VF16UF1:       [[PRED_STORE_IF13]]:
458; VF16UF1-NEXT:    [[TMP18:%.*]] = getelementptr i16, ptr [[DST]], i64 9
459; VF16UF1-NEXT:    store i16 0, ptr [[TMP18]], align 2
460; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
461; VF16UF1:       [[PRED_STORE_CONTINUE14]]:
462; VF16UF1-NEXT:    [[TMP19:%.*]] = extractelement <16 x i1> [[TMP2]], i32 8
463; VF16UF1-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
464; VF16UF1:       [[PRED_STORE_IF15]]:
465; VF16UF1-NEXT:    [[TMP20:%.*]] = getelementptr i16, ptr [[DST]], i64 10
466; VF16UF1-NEXT:    store i16 0, ptr [[TMP20]], align 2
467; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
468; VF16UF1:       [[PRED_STORE_CONTINUE16]]:
469; VF16UF1-NEXT:    [[TMP21:%.*]] = extractelement <16 x i1> [[TMP2]], i32 9
470; VF16UF1-NEXT:    br i1 [[TMP21]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
471; VF16UF1:       [[PRED_STORE_IF17]]:
472; VF16UF1-NEXT:    [[TMP22:%.*]] = getelementptr i16, ptr [[DST]], i64 11
473; VF16UF1-NEXT:    store i16 0, ptr [[TMP22]], align 2
474; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
475; VF16UF1:       [[PRED_STORE_CONTINUE18]]:
476; VF16UF1-NEXT:    [[TMP23:%.*]] = extractelement <16 x i1> [[TMP2]], i32 10
477; VF16UF1-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
478; VF16UF1:       [[PRED_STORE_IF19]]:
479; VF16UF1-NEXT:    [[TMP24:%.*]] = getelementptr i16, ptr [[DST]], i64 12
480; VF16UF1-NEXT:    store i16 0, ptr [[TMP24]], align 2
481; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
482; VF16UF1:       [[PRED_STORE_CONTINUE20]]:
483; VF16UF1-NEXT:    [[TMP25:%.*]] = extractelement <16 x i1> [[TMP2]], i32 11
484; VF16UF1-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
485; VF16UF1:       [[PRED_STORE_IF21]]:
486; VF16UF1-NEXT:    [[TMP26:%.*]] = getelementptr i16, ptr [[DST]], i64 13
487; VF16UF1-NEXT:    store i16 0, ptr [[TMP26]], align 2
488; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
489; VF16UF1:       [[PRED_STORE_CONTINUE22]]:
490; VF16UF1-NEXT:    [[TMP27:%.*]] = extractelement <16 x i1> [[TMP2]], i32 12
491; VF16UF1-NEXT:    br i1 [[TMP27]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
492; VF16UF1:       [[PRED_STORE_IF23]]:
493; VF16UF1-NEXT:    [[TMP28:%.*]] = getelementptr i16, ptr [[DST]], i64 14
494; VF16UF1-NEXT:    store i16 0, ptr [[TMP28]], align 2
495; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
496; VF16UF1:       [[PRED_STORE_CONTINUE24]]:
497; VF16UF1-NEXT:    [[TMP29:%.*]] = extractelement <16 x i1> [[TMP2]], i32 13
498; VF16UF1-NEXT:    br i1 [[TMP29]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
499; VF16UF1:       [[PRED_STORE_IF25]]:
500; VF16UF1-NEXT:    [[TMP30:%.*]] = getelementptr i16, ptr [[DST]], i64 15
501; VF16UF1-NEXT:    store i16 0, ptr [[TMP30]], align 2
502; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
503; VF16UF1:       [[PRED_STORE_CONTINUE26]]:
504; VF16UF1-NEXT:    [[TMP31:%.*]] = extractelement <16 x i1> [[TMP2]], i32 14
505; VF16UF1-NEXT:    br i1 [[TMP31]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
506; VF16UF1:       [[PRED_STORE_IF27]]:
507; VF16UF1-NEXT:    [[TMP32:%.*]] = getelementptr i16, ptr [[DST]], i64 16
508; VF16UF1-NEXT:    store i16 0, ptr [[TMP32]], align 2
509; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
510; VF16UF1:       [[PRED_STORE_CONTINUE28]]:
511; VF16UF1-NEXT:    [[TMP33:%.*]] = extractelement <16 x i1> [[TMP2]], i32 15
512; VF16UF1-NEXT:    br i1 [[TMP33]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
513; VF16UF1:       [[PRED_STORE_IF29]]:
514; VF16UF1-NEXT:    [[TMP34:%.*]] = getelementptr i16, ptr [[DST]], i64 17
515; VF16UF1-NEXT:    store i16 0, ptr [[TMP34]], align 2
516; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
517; VF16UF1:       [[PRED_STORE_CONTINUE30]]:
518; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
519; VF16UF1:       [[MIDDLE_BLOCK]]:
520; VF16UF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
521; VF16UF1:       [[SCALAR_PH]]:
522; VF16UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ]
523; VF16UF1-NEXT:    br label %[[LOOP:.*]]
524; VF16UF1:       [[LOOP]]:
525; VF16UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
526; VF16UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i16, ptr [[DST]], i64 [[IV]]
527; VF16UF1-NEXT:    store i16 0, ptr [[GEP_DST]], align 2
528; VF16UF1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
529; VF16UF1-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
530; VF16UF1-NEXT:    br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
531; VF16UF1:       [[EXIT]]:
532; VF16UF1-NEXT:    ret void
533;
534entry:
535  br label %loop
536
537loop:
538  %iv = phi i64 [ 2, %entry ], [ %iv.next, %loop ]
539  %gep.dst = getelementptr i16, ptr %dst, i64 %iv
540  store i16 0, ptr %gep.dst, align 2
541  %iv.next = add i64 %iv, 1
542  %ec = icmp eq i64 %iv.next, %N
543  br i1 %ec, label %exit, label %loop
544
545exit:
546  ret void
547}
548
549declare i1 @cond()
550
551define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias %src, ptr %dst) {
552; VF8UF1-LABEL: define void @remove_loop_region_outer_loop(
553; VF8UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
554; VF8UF1-NEXT:  [[ENTRY:.*]]:
555; VF8UF1-NEXT:    br label %[[OUTER_HEADER:.*]]
556; VF8UF1:       [[OUTER_HEADER]]:
557; VF8UF1-NEXT:    [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
558; VF8UF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
559; VF8UF1:       [[VECTOR_PH]]:
560; VF8UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
561; VF8UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
562; VF8UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
563; VF8UF1:       [[VECTOR_BODY]]:
564; VF8UF1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
565; VF8UF1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
566; VF8UF1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[TMP0]]
567; VF8UF1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0
568; VF8UF1-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
569; VF8UF1-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
570; VF8UF1-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0
571; VF8UF1-NEXT:    store <8 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1
572; VF8UF1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
573; VF8UF1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
574; VF8UF1-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
575; VF8UF1:       [[MIDDLE_BLOCK]]:
576; VF8UF1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
577; VF8UF1-NEXT:    br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
578; VF8UF1:       [[SCALAR_PH]]:
579; VF8UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
580; VF8UF1-NEXT:    br label %[[INNER:.*]]
581; VF8UF1:       [[INNER]]:
582; VF8UF1-NEXT:    [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
583; VF8UF1-NEXT:    [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
584; VF8UF1-NEXT:    [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
585; VF8UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
586; VF8UF1-NEXT:    store i8 [[L]], ptr [[GEP_DST]], align 1
587; VF8UF1-NEXT:    [[IV_NEXT]] = add i64 [[INNER_IV]], 1
588; VF8UF1-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
589; VF8UF1-NEXT:    br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP6:![0-9]+]]
590; VF8UF1:       [[OUTER_LATCH]]:
591; VF8UF1-NEXT:    [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
592; VF8UF1-NEXT:    [[C_2:%.*]] = call i1 @cond()
593; VF8UF1-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
594; VF8UF1:       [[EXIT]]:
595; VF8UF1-NEXT:    ret void
596;
597; VF8UF2-LABEL: define void @remove_loop_region_outer_loop(
598; VF8UF2-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
599; VF8UF2-NEXT:  [[ENTRY:.*]]:
600; VF8UF2-NEXT:    br label %[[OUTER_HEADER:.*]]
601; VF8UF2:       [[OUTER_HEADER]]:
602; VF8UF2-NEXT:    [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
603; VF8UF2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
604; VF8UF2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
605; VF8UF2:       [[VECTOR_PH]]:
606; VF8UF2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
607; VF8UF2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
608; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
609; VF8UF2:       [[VECTOR_BODY]]:
610; VF8UF2-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 0
611; VF8UF2-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
612; VF8UF2-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8
613; VF8UF2-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
614; VF8UF2-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
615; VF8UF2-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 0
616; VF8UF2-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP3]], i32 0
617; VF8UF2-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP3]], i32 8
618; VF8UF2-NEXT:    store <8 x i8> [[WIDE_LOAD]], ptr [[TMP4]], align 1
619; VF8UF2-NEXT:    store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1
620; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
621; VF8UF2:       [[MIDDLE_BLOCK]]:
622; VF8UF2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
623; VF8UF2-NEXT:    br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
624; VF8UF2:       [[SCALAR_PH]]:
625; VF8UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
626; VF8UF2-NEXT:    br label %[[INNER:.*]]
627; VF8UF2:       [[INNER]]:
628; VF8UF2-NEXT:    [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
629; VF8UF2-NEXT:    [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
630; VF8UF2-NEXT:    [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
631; VF8UF2-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
632; VF8UF2-NEXT:    store i8 [[L]], ptr [[GEP_DST]], align 1
633; VF8UF2-NEXT:    [[IV_NEXT]] = add i64 [[INNER_IV]], 1
634; VF8UF2-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
635; VF8UF2-NEXT:    br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP4:![0-9]+]]
636; VF8UF2:       [[OUTER_LATCH]]:
637; VF8UF2-NEXT:    [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
638; VF8UF2-NEXT:    [[C_2:%.*]] = call i1 @cond()
639; VF8UF2-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
640; VF8UF2:       [[EXIT]]:
641; VF8UF2-NEXT:    ret void
642;
643; VF16UF1-LABEL: define void @remove_loop_region_outer_loop(
644; VF16UF1-SAME: i64 range(i64 8, 17) [[N:%.*]], ptr noalias [[SRC:%.*]], ptr [[DST:%.*]]) {
645; VF16UF1-NEXT:  [[ENTRY:.*]]:
646; VF16UF1-NEXT:    br label %[[OUTER_HEADER:.*]]
647; VF16UF1:       [[OUTER_HEADER]]:
648; VF16UF1-NEXT:    [[OUTER_IV:%.*]] = phi ptr [ [[SRC]], %[[ENTRY]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
649; VF16UF1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16
650; VF16UF1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
651; VF16UF1:       [[VECTOR_PH]]:
652; VF16UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
653; VF16UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
654; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
655; VF16UF1:       [[VECTOR_BODY]]:
656; VF16UF1-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 0
657; VF16UF1-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0
658; VF16UF1-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
659; VF16UF1-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 0
660; VF16UF1-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
661; VF16UF1-NEXT:    store <16 x i8> [[WIDE_LOAD]], ptr [[TMP3]], align 1
662; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
663; VF16UF1:       [[MIDDLE_BLOCK]]:
664; VF16UF1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
665; VF16UF1-NEXT:    br i1 [[CMP_N]], label %[[OUTER_LATCH]], label %[[SCALAR_PH]]
666; VF16UF1:       [[SCALAR_PH]]:
667; VF16UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[OUTER_HEADER]] ]
668; VF16UF1-NEXT:    br label %[[INNER:.*]]
669; VF16UF1:       [[INNER]]:
670; VF16UF1-NEXT:    [[INNER_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[INNER]] ]
671; VF16UF1-NEXT:    [[GEP_SRC:%.*]] = getelementptr i8, ptr [[OUTER_IV]], i64 [[INNER_IV]]
672; VF16UF1-NEXT:    [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1
673; VF16UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INNER_IV]]
674; VF16UF1-NEXT:    store i8 [[L]], ptr [[GEP_DST]], align 1
675; VF16UF1-NEXT:    [[IV_NEXT]] = add i64 [[INNER_IV]], 1
676; VF16UF1-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
677; VF16UF1-NEXT:    br i1 [[C_1]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP4:![0-9]+]]
678; VF16UF1:       [[OUTER_LATCH]]:
679; VF16UF1-NEXT:    [[OUTER_IV_NEXT]] = getelementptr i8, ptr [[OUTER_IV]], i64 1
680; VF16UF1-NEXT:    [[C_2:%.*]] = call i1 @cond()
681; VF16UF1-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
682; VF16UF1:       [[EXIT]]:
683; VF16UF1-NEXT:    ret void
684;
685entry:
686  br label %outer.header
687
688outer.header:
689  %outer.iv = phi ptr [ %src, %entry ], [ %outer.iv.next, %outer.latch ]
690  br label %inner
691
692inner:
693  %inner.iv = phi i64 [ 0, %outer.header ], [ %iv.next, %inner ]
694  %gep.src = getelementptr i8, ptr %outer.iv, i64 %inner.iv
695  %l = load i8, ptr %gep.src, align 1
696  %gep.dst = getelementptr i8, ptr %dst, i64 %inner.iv
697  store i8 %l, ptr %gep.dst, align 1
698  %iv.next = add i64 %inner.iv, 1
699  %c.1 = icmp eq i64 %iv.next, %N
700  br i1 %c.1, label %outer.latch, label %inner
701
702outer.latch:
703  %outer.iv.next = getelementptr i8, ptr %outer.iv, i64 1
704  %c.2 = call i1 @cond()
705  br i1 %c.2, label %outer.header, label %exit
706
707exit:
708  ret void
709}
710
711declare void @llvm.assume(i1)
712
713; Test case for https://github.com/llvm/llvm-project/issues/121897.
714define void @scev_expand_step(i64 %x, ptr %dst) {
715; VF8UF1-LABEL: define void @scev_expand_step(
716; VF8UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
717; VF8UF1-NEXT:  [[ENTRY:.*]]:
718; VF8UF1-NEXT:    [[C:%.*]] = icmp eq i64 [[X]], 65536
719; VF8UF1-NEXT:    call void @llvm.assume(i1 [[C]])
720; VF8UF1-NEXT:    [[FR:%.*]] = freeze i64 [[X]]
721; VF8UF1-NEXT:    [[STEP:%.*]] = add i64 [[FR]], -65534
722; VF8UF1-NEXT:    [[TMP0:%.*]] = udiv i64 15, [[STEP]]
723; VF8UF1-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
724; VF8UF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
725; VF8UF1:       [[VECTOR_PH]]:
726; VF8UF1-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP1]], 7
727; VF8UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 8
728; VF8UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
729; VF8UF1-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
730; VF8UF1-NEXT:    [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
731; VF8UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
732; VF8UF1:       [[VECTOR_BODY]]:
733; VF8UF1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
734; VF8UF1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
735; VF8UF1-NEXT:    [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
736; VF8UF1-NEXT:    [[TMP4:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
737; VF8UF1-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
738; VF8UF1:       [[PRED_STORE_IF]]:
739; VF8UF1-NEXT:    [[TMP5:%.*]] = mul i64 0, [[STEP]]
740; VF8UF1-NEXT:    [[TMP6:%.*]] = add i64 0, [[TMP5]]
741; VF8UF1-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
742; VF8UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
743; VF8UF1-NEXT:    store i8 0, ptr [[TMP8]], align 1
744; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
745; VF8UF1:       [[PRED_STORE_CONTINUE]]:
746; VF8UF1-NEXT:    [[TMP9:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
747; VF8UF1-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
748; VF8UF1:       [[PRED_STORE_IF1]]:
749; VF8UF1-NEXT:    [[TMP10:%.*]] = mul i64 1, [[STEP]]
750; VF8UF1-NEXT:    [[TMP11:%.*]] = add i64 0, [[TMP10]]
751; VF8UF1-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
752; VF8UF1-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
753; VF8UF1-NEXT:    store i8 0, ptr [[TMP13]], align 1
754; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
755; VF8UF1:       [[PRED_STORE_CONTINUE2]]:
756; VF8UF1-NEXT:    [[TMP14:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
757; VF8UF1-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
758; VF8UF1:       [[PRED_STORE_IF3]]:
759; VF8UF1-NEXT:    [[TMP15:%.*]] = mul i64 2, [[STEP]]
760; VF8UF1-NEXT:    [[TMP16:%.*]] = add i64 0, [[TMP15]]
761; VF8UF1-NEXT:    [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
762; VF8UF1-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
763; VF8UF1-NEXT:    store i8 0, ptr [[TMP18]], align 1
764; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
765; VF8UF1:       [[PRED_STORE_CONTINUE4]]:
766; VF8UF1-NEXT:    [[TMP19:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
767; VF8UF1-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
768; VF8UF1:       [[PRED_STORE_IF5]]:
769; VF8UF1-NEXT:    [[TMP20:%.*]] = mul i64 3, [[STEP]]
770; VF8UF1-NEXT:    [[TMP21:%.*]] = add i64 0, [[TMP20]]
771; VF8UF1-NEXT:    [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
772; VF8UF1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
773; VF8UF1-NEXT:    store i8 0, ptr [[TMP23]], align 1
774; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
775; VF8UF1:       [[PRED_STORE_CONTINUE6]]:
776; VF8UF1-NEXT:    [[TMP24:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
777; VF8UF1-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
778; VF8UF1:       [[PRED_STORE_IF7]]:
779; VF8UF1-NEXT:    [[TMP25:%.*]] = mul i64 4, [[STEP]]
780; VF8UF1-NEXT:    [[TMP26:%.*]] = add i64 0, [[TMP25]]
781; VF8UF1-NEXT:    [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
782; VF8UF1-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
783; VF8UF1-NEXT:    store i8 0, ptr [[TMP28]], align 1
784; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
785; VF8UF1:       [[PRED_STORE_CONTINUE8]]:
786; VF8UF1-NEXT:    [[TMP29:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
787; VF8UF1-NEXT:    br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
788; VF8UF1:       [[PRED_STORE_IF9]]:
789; VF8UF1-NEXT:    [[TMP30:%.*]] = mul i64 5, [[STEP]]
790; VF8UF1-NEXT:    [[TMP31:%.*]] = add i64 0, [[TMP30]]
791; VF8UF1-NEXT:    [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
792; VF8UF1-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
793; VF8UF1-NEXT:    store i8 0, ptr [[TMP33]], align 1
794; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
795; VF8UF1:       [[PRED_STORE_CONTINUE10]]:
796; VF8UF1-NEXT:    [[TMP34:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
797; VF8UF1-NEXT:    br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
798; VF8UF1:       [[PRED_STORE_IF11]]:
799; VF8UF1-NEXT:    [[TMP35:%.*]] = mul i64 6, [[STEP]]
800; VF8UF1-NEXT:    [[TMP36:%.*]] = add i64 0, [[TMP35]]
801; VF8UF1-NEXT:    [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
802; VF8UF1-NEXT:    [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
803; VF8UF1-NEXT:    store i8 0, ptr [[TMP38]], align 1
804; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
805; VF8UF1:       [[PRED_STORE_CONTINUE12]]:
806; VF8UF1-NEXT:    [[TMP39:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
807; VF8UF1-NEXT:    br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
808; VF8UF1:       [[PRED_STORE_IF13]]:
809; VF8UF1-NEXT:    [[TMP40:%.*]] = mul i64 7, [[STEP]]
810; VF8UF1-NEXT:    [[TMP41:%.*]] = add i64 0, [[TMP40]]
811; VF8UF1-NEXT:    [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
812; VF8UF1-NEXT:    [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
813; VF8UF1-NEXT:    store i8 0, ptr [[TMP43]], align 1
814; VF8UF1-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
815; VF8UF1:       [[PRED_STORE_CONTINUE14]]:
816; VF8UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
817; VF8UF1:       [[MIDDLE_BLOCK]]:
818; VF8UF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
819; VF8UF1:       [[SCALAR_PH]]:
820; VF8UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
821; VF8UF1-NEXT:    br label %[[LOOP:.*]]
822; VF8UF1:       [[LOOP]]:
823; VF8UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
824; VF8UF1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
825; VF8UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
826; VF8UF1-NEXT:    store i8 0, ptr [[GEP_DST]], align 1
827; VF8UF1-NEXT:    [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
828; VF8UF1-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
829; VF8UF1:       [[EXIT]]:
830; VF8UF1-NEXT:    ret void
831;
832; VF8UF2-LABEL: define void @scev_expand_step(
833; VF8UF2-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
834; VF8UF2-NEXT:  [[ENTRY:.*]]:
835; VF8UF2-NEXT:    [[C:%.*]] = icmp eq i64 [[X]], 65536
836; VF8UF2-NEXT:    call void @llvm.assume(i1 [[C]])
837; VF8UF2-NEXT:    [[FR:%.*]] = freeze i64 [[X]]
838; VF8UF2-NEXT:    [[STEP:%.*]] = add i64 [[FR]], -65534
839; VF8UF2-NEXT:    [[TMP0:%.*]] = udiv i64 15, [[STEP]]
840; VF8UF2-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
841; VF8UF2-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
842; VF8UF2:       [[VECTOR_PH]]:
843; VF8UF2-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15
844; VF8UF2-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
845; VF8UF2-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
846; VF8UF2-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
847; VF8UF2-NEXT:    [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
848; VF8UF2-NEXT:    br label %[[VECTOR_BODY:.*]]
849; VF8UF2:       [[VECTOR_BODY]]:
850; VF8UF2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
851; VF8UF2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer
852; VF8UF2-NEXT:    [[TMP3:%.*]] = icmp ule <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[BROADCAST_SPLAT]]
853; VF8UF2-NEXT:    [[TMP4:%.*]] = icmp ule <8 x i64> <i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
854; VF8UF2-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP3]], i32 0
855; VF8UF2-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
856; VF8UF2:       [[PRED_STORE_IF]]:
857; VF8UF2-NEXT:    [[TMP6:%.*]] = mul i64 0, [[STEP]]
858; VF8UF2-NEXT:    [[TMP7:%.*]] = add i64 0, [[TMP6]]
859; VF8UF2-NEXT:    [[TMP8:%.*]] = add i64 [[TMP7]], [[STEP]]
860; VF8UF2-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
861; VF8UF2-NEXT:    store i8 0, ptr [[TMP9]], align 1
862; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE]]
863; VF8UF2:       [[PRED_STORE_CONTINUE]]:
864; VF8UF2-NEXT:    [[TMP10:%.*]] = extractelement <8 x i1> [[TMP3]], i32 1
865; VF8UF2-NEXT:    br i1 [[TMP10]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
866; VF8UF2:       [[PRED_STORE_IF1]]:
867; VF8UF2-NEXT:    [[TMP11:%.*]] = mul i64 1, [[STEP]]
868; VF8UF2-NEXT:    [[TMP12:%.*]] = add i64 0, [[TMP11]]
869; VF8UF2-NEXT:    [[TMP13:%.*]] = add i64 [[TMP12]], [[STEP]]
870; VF8UF2-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP13]]
871; VF8UF2-NEXT:    store i8 0, ptr [[TMP14]], align 1
872; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
873; VF8UF2:       [[PRED_STORE_CONTINUE2]]:
874; VF8UF2-NEXT:    [[TMP15:%.*]] = extractelement <8 x i1> [[TMP3]], i32 2
875; VF8UF2-NEXT:    br i1 [[TMP15]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
876; VF8UF2:       [[PRED_STORE_IF3]]:
877; VF8UF2-NEXT:    [[TMP16:%.*]] = mul i64 2, [[STEP]]
878; VF8UF2-NEXT:    [[TMP17:%.*]] = add i64 0, [[TMP16]]
879; VF8UF2-NEXT:    [[TMP18:%.*]] = add i64 [[TMP17]], [[STEP]]
880; VF8UF2-NEXT:    [[TMP19:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP18]]
881; VF8UF2-NEXT:    store i8 0, ptr [[TMP19]], align 1
882; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
883; VF8UF2:       [[PRED_STORE_CONTINUE4]]:
884; VF8UF2-NEXT:    [[TMP20:%.*]] = extractelement <8 x i1> [[TMP3]], i32 3
885; VF8UF2-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
886; VF8UF2:       [[PRED_STORE_IF5]]:
887; VF8UF2-NEXT:    [[TMP21:%.*]] = mul i64 3, [[STEP]]
888; VF8UF2-NEXT:    [[TMP22:%.*]] = add i64 0, [[TMP21]]
889; VF8UF2-NEXT:    [[TMP23:%.*]] = add i64 [[TMP22]], [[STEP]]
890; VF8UF2-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
891; VF8UF2-NEXT:    store i8 0, ptr [[TMP24]], align 1
892; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
893; VF8UF2:       [[PRED_STORE_CONTINUE6]]:
894; VF8UF2-NEXT:    [[TMP25:%.*]] = extractelement <8 x i1> [[TMP3]], i32 4
895; VF8UF2-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
896; VF8UF2:       [[PRED_STORE_IF7]]:
897; VF8UF2-NEXT:    [[TMP26:%.*]] = mul i64 4, [[STEP]]
898; VF8UF2-NEXT:    [[TMP27:%.*]] = add i64 0, [[TMP26]]
899; VF8UF2-NEXT:    [[TMP28:%.*]] = add i64 [[TMP27]], [[STEP]]
900; VF8UF2-NEXT:    [[TMP29:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP28]]
901; VF8UF2-NEXT:    store i8 0, ptr [[TMP29]], align 1
902; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
903; VF8UF2:       [[PRED_STORE_CONTINUE8]]:
904; VF8UF2-NEXT:    [[TMP30:%.*]] = extractelement <8 x i1> [[TMP3]], i32 5
905; VF8UF2-NEXT:    br i1 [[TMP30]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
906; VF8UF2:       [[PRED_STORE_IF9]]:
907; VF8UF2-NEXT:    [[TMP31:%.*]] = mul i64 5, [[STEP]]
908; VF8UF2-NEXT:    [[TMP32:%.*]] = add i64 0, [[TMP31]]
909; VF8UF2-NEXT:    [[TMP33:%.*]] = add i64 [[TMP32]], [[STEP]]
910; VF8UF2-NEXT:    [[TMP34:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP33]]
911; VF8UF2-NEXT:    store i8 0, ptr [[TMP34]], align 1
912; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
913; VF8UF2:       [[PRED_STORE_CONTINUE10]]:
914; VF8UF2-NEXT:    [[TMP35:%.*]] = extractelement <8 x i1> [[TMP3]], i32 6
915; VF8UF2-NEXT:    br i1 [[TMP35]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
916; VF8UF2:       [[PRED_STORE_IF11]]:
917; VF8UF2-NEXT:    [[TMP36:%.*]] = mul i64 6, [[STEP]]
918; VF8UF2-NEXT:    [[TMP37:%.*]] = add i64 0, [[TMP36]]
919; VF8UF2-NEXT:    [[TMP38:%.*]] = add i64 [[TMP37]], [[STEP]]
920; VF8UF2-NEXT:    [[TMP39:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP38]]
921; VF8UF2-NEXT:    store i8 0, ptr [[TMP39]], align 1
922; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
923; VF8UF2:       [[PRED_STORE_CONTINUE12]]:
924; VF8UF2-NEXT:    [[TMP40:%.*]] = extractelement <8 x i1> [[TMP3]], i32 7
925; VF8UF2-NEXT:    br i1 [[TMP40]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
926; VF8UF2:       [[PRED_STORE_IF13]]:
927; VF8UF2-NEXT:    [[TMP41:%.*]] = mul i64 7, [[STEP]]
928; VF8UF2-NEXT:    [[TMP42:%.*]] = add i64 0, [[TMP41]]
929; VF8UF2-NEXT:    [[TMP43:%.*]] = add i64 [[TMP42]], [[STEP]]
930; VF8UF2-NEXT:    [[TMP44:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP43]]
931; VF8UF2-NEXT:    store i8 0, ptr [[TMP44]], align 1
932; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
933; VF8UF2:       [[PRED_STORE_CONTINUE14]]:
934; VF8UF2-NEXT:    [[TMP45:%.*]] = extractelement <8 x i1> [[TMP4]], i32 0
935; VF8UF2-NEXT:    br i1 [[TMP45]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
936; VF8UF2:       [[PRED_STORE_IF15]]:
937; VF8UF2-NEXT:    [[TMP46:%.*]] = mul i64 8, [[STEP]]
938; VF8UF2-NEXT:    [[TMP47:%.*]] = add i64 0, [[TMP46]]
939; VF8UF2-NEXT:    [[TMP48:%.*]] = add i64 [[TMP47]], [[STEP]]
940; VF8UF2-NEXT:    [[TMP49:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP48]]
941; VF8UF2-NEXT:    store i8 0, ptr [[TMP49]], align 1
942; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
943; VF8UF2:       [[PRED_STORE_CONTINUE16]]:
944; VF8UF2-NEXT:    [[TMP50:%.*]] = extractelement <8 x i1> [[TMP4]], i32 1
945; VF8UF2-NEXT:    br i1 [[TMP50]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
946; VF8UF2:       [[PRED_STORE_IF17]]:
947; VF8UF2-NEXT:    [[TMP51:%.*]] = mul i64 9, [[STEP]]
948; VF8UF2-NEXT:    [[TMP52:%.*]] = add i64 0, [[TMP51]]
949; VF8UF2-NEXT:    [[TMP53:%.*]] = add i64 [[TMP52]], [[STEP]]
950; VF8UF2-NEXT:    [[TMP54:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP53]]
951; VF8UF2-NEXT:    store i8 0, ptr [[TMP54]], align 1
952; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
953; VF8UF2:       [[PRED_STORE_CONTINUE18]]:
954; VF8UF2-NEXT:    [[TMP55:%.*]] = extractelement <8 x i1> [[TMP4]], i32 2
955; VF8UF2-NEXT:    br i1 [[TMP55]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
956; VF8UF2:       [[PRED_STORE_IF19]]:
957; VF8UF2-NEXT:    [[TMP56:%.*]] = mul i64 10, [[STEP]]
958; VF8UF2-NEXT:    [[TMP57:%.*]] = add i64 0, [[TMP56]]
959; VF8UF2-NEXT:    [[TMP58:%.*]] = add i64 [[TMP57]], [[STEP]]
960; VF8UF2-NEXT:    [[TMP59:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP58]]
961; VF8UF2-NEXT:    store i8 0, ptr [[TMP59]], align 1
962; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
963; VF8UF2:       [[PRED_STORE_CONTINUE20]]:
964; VF8UF2-NEXT:    [[TMP60:%.*]] = extractelement <8 x i1> [[TMP4]], i32 3
965; VF8UF2-NEXT:    br i1 [[TMP60]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
966; VF8UF2:       [[PRED_STORE_IF21]]:
967; VF8UF2-NEXT:    [[TMP61:%.*]] = mul i64 11, [[STEP]]
968; VF8UF2-NEXT:    [[TMP62:%.*]] = add i64 0, [[TMP61]]
969; VF8UF2-NEXT:    [[TMP63:%.*]] = add i64 [[TMP62]], [[STEP]]
970; VF8UF2-NEXT:    [[TMP64:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP63]]
971; VF8UF2-NEXT:    store i8 0, ptr [[TMP64]], align 1
972; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
973; VF8UF2:       [[PRED_STORE_CONTINUE22]]:
974; VF8UF2-NEXT:    [[TMP65:%.*]] = extractelement <8 x i1> [[TMP4]], i32 4
975; VF8UF2-NEXT:    br i1 [[TMP65]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
976; VF8UF2:       [[PRED_STORE_IF23]]:
977; VF8UF2-NEXT:    [[TMP66:%.*]] = mul i64 12, [[STEP]]
978; VF8UF2-NEXT:    [[TMP67:%.*]] = add i64 0, [[TMP66]]
979; VF8UF2-NEXT:    [[TMP68:%.*]] = add i64 [[TMP67]], [[STEP]]
980; VF8UF2-NEXT:    [[TMP69:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP68]]
981; VF8UF2-NEXT:    store i8 0, ptr [[TMP69]], align 1
982; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
983; VF8UF2:       [[PRED_STORE_CONTINUE24]]:
984; VF8UF2-NEXT:    [[TMP70:%.*]] = extractelement <8 x i1> [[TMP4]], i32 5
985; VF8UF2-NEXT:    br i1 [[TMP70]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
986; VF8UF2:       [[PRED_STORE_IF25]]:
987; VF8UF2-NEXT:    [[TMP71:%.*]] = mul i64 13, [[STEP]]
988; VF8UF2-NEXT:    [[TMP72:%.*]] = add i64 0, [[TMP71]]
989; VF8UF2-NEXT:    [[TMP73:%.*]] = add i64 [[TMP72]], [[STEP]]
990; VF8UF2-NEXT:    [[TMP74:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP73]]
991; VF8UF2-NEXT:    store i8 0, ptr [[TMP74]], align 1
992; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
993; VF8UF2:       [[PRED_STORE_CONTINUE26]]:
994; VF8UF2-NEXT:    [[TMP75:%.*]] = extractelement <8 x i1> [[TMP4]], i32 6
995; VF8UF2-NEXT:    br i1 [[TMP75]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
996; VF8UF2:       [[PRED_STORE_IF27]]:
997; VF8UF2-NEXT:    [[TMP76:%.*]] = mul i64 14, [[STEP]]
998; VF8UF2-NEXT:    [[TMP77:%.*]] = add i64 0, [[TMP76]]
999; VF8UF2-NEXT:    [[TMP78:%.*]] = add i64 [[TMP77]], [[STEP]]
1000; VF8UF2-NEXT:    [[TMP79:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP78]]
1001; VF8UF2-NEXT:    store i8 0, ptr [[TMP79]], align 1
1002; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
1003; VF8UF2:       [[PRED_STORE_CONTINUE28]]:
1004; VF8UF2-NEXT:    [[TMP80:%.*]] = extractelement <8 x i1> [[TMP4]], i32 7
1005; VF8UF2-NEXT:    br i1 [[TMP80]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
1006; VF8UF2:       [[PRED_STORE_IF29]]:
1007; VF8UF2-NEXT:    [[TMP81:%.*]] = mul i64 15, [[STEP]]
1008; VF8UF2-NEXT:    [[TMP82:%.*]] = add i64 0, [[TMP81]]
1009; VF8UF2-NEXT:    [[TMP83:%.*]] = add i64 [[TMP82]], [[STEP]]
1010; VF8UF2-NEXT:    [[TMP84:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP83]]
1011; VF8UF2-NEXT:    store i8 0, ptr [[TMP84]], align 1
1012; VF8UF2-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
1013; VF8UF2:       [[PRED_STORE_CONTINUE30]]:
1014; VF8UF2-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
1015; VF8UF2:       [[MIDDLE_BLOCK]]:
1016; VF8UF2-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
1017; VF8UF2:       [[SCALAR_PH]]:
1018; VF8UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1019; VF8UF2-NEXT:    br label %[[LOOP:.*]]
1020; VF8UF2:       [[LOOP]]:
1021; VF8UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1022; VF8UF2-NEXT:    [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
1023; VF8UF2-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
1024; VF8UF2-NEXT:    store i8 0, ptr [[GEP_DST]], align 1
1025; VF8UF2-NEXT:    [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
1026; VF8UF2-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
1027; VF8UF2:       [[EXIT]]:
1028; VF8UF2-NEXT:    ret void
1029;
1030; VF16UF1-LABEL: define void @scev_expand_step(
1031; VF16UF1-SAME: i64 [[X:%.*]], ptr [[DST:%.*]]) {
1032; VF16UF1-NEXT:  [[ENTRY:.*]]:
1033; VF16UF1-NEXT:    [[C:%.*]] = icmp eq i64 [[X]], 65536
1034; VF16UF1-NEXT:    call void @llvm.assume(i1 [[C]])
1035; VF16UF1-NEXT:    [[FR:%.*]] = freeze i64 [[X]]
1036; VF16UF1-NEXT:    [[STEP:%.*]] = add i64 [[FR]], -65534
1037; VF16UF1-NEXT:    [[TMP0:%.*]] = udiv i64 15, [[STEP]]
1038; VF16UF1-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
1039; VF16UF1-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1040; VF16UF1:       [[VECTOR_PH]]:
1041; VF16UF1-NEXT:    [[N_RND_UP:%.*]] = add i64 [[TMP1]], 15
1042; VF16UF1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16
1043; VF16UF1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
1044; VF16UF1-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
1045; VF16UF1-NEXT:    [[TMP2:%.*]] = mul i64 [[N_VEC]], [[STEP]]
1046; VF16UF1-NEXT:    br label %[[VECTOR_BODY:.*]]
1047; VF16UF1:       [[VECTOR_BODY]]:
1048; VF16UF1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
1049; VF16UF1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer
1050; VF16UF1-NEXT:    [[TMP3:%.*]] = icmp ule <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, [[BROADCAST_SPLAT]]
1051; VF16UF1-NEXT:    [[TMP4:%.*]] = extractelement <16 x i1> [[TMP3]], i32 0
1052; VF16UF1-NEXT:    br i1 [[TMP4]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
1053; VF16UF1:       [[PRED_STORE_IF]]:
1054; VF16UF1-NEXT:    [[TMP5:%.*]] = mul i64 0, [[STEP]]
1055; VF16UF1-NEXT:    [[TMP6:%.*]] = add i64 0, [[TMP5]]
1056; VF16UF1-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], [[STEP]]
1057; VF16UF1-NEXT:    [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
1058; VF16UF1-NEXT:    store i8 0, ptr [[TMP8]], align 1
1059; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE]]
1060; VF16UF1:       [[PRED_STORE_CONTINUE]]:
1061; VF16UF1-NEXT:    [[TMP9:%.*]] = extractelement <16 x i1> [[TMP3]], i32 1
1062; VF16UF1-NEXT:    br i1 [[TMP9]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
1063; VF16UF1:       [[PRED_STORE_IF1]]:
1064; VF16UF1-NEXT:    [[TMP10:%.*]] = mul i64 1, [[STEP]]
1065; VF16UF1-NEXT:    [[TMP11:%.*]] = add i64 0, [[TMP10]]
1066; VF16UF1-NEXT:    [[TMP12:%.*]] = add i64 [[TMP11]], [[STEP]]
1067; VF16UF1-NEXT:    [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
1068; VF16UF1-NEXT:    store i8 0, ptr [[TMP13]], align 1
1069; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
1070; VF16UF1:       [[PRED_STORE_CONTINUE2]]:
1071; VF16UF1-NEXT:    [[TMP14:%.*]] = extractelement <16 x i1> [[TMP3]], i32 2
1072; VF16UF1-NEXT:    br i1 [[TMP14]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
1073; VF16UF1:       [[PRED_STORE_IF3]]:
1074; VF16UF1-NEXT:    [[TMP15:%.*]] = mul i64 2, [[STEP]]
1075; VF16UF1-NEXT:    [[TMP16:%.*]] = add i64 0, [[TMP15]]
1076; VF16UF1-NEXT:    [[TMP17:%.*]] = add i64 [[TMP16]], [[STEP]]
1077; VF16UF1-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
1078; VF16UF1-NEXT:    store i8 0, ptr [[TMP18]], align 1
1079; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE4]]
1080; VF16UF1:       [[PRED_STORE_CONTINUE4]]:
1081; VF16UF1-NEXT:    [[TMP19:%.*]] = extractelement <16 x i1> [[TMP3]], i32 3
1082; VF16UF1-NEXT:    br i1 [[TMP19]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
1083; VF16UF1:       [[PRED_STORE_IF5]]:
1084; VF16UF1-NEXT:    [[TMP20:%.*]] = mul i64 3, [[STEP]]
1085; VF16UF1-NEXT:    [[TMP21:%.*]] = add i64 0, [[TMP20]]
1086; VF16UF1-NEXT:    [[TMP22:%.*]] = add i64 [[TMP21]], [[STEP]]
1087; VF16UF1-NEXT:    [[TMP23:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP22]]
1088; VF16UF1-NEXT:    store i8 0, ptr [[TMP23]], align 1
1089; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE6]]
1090; VF16UF1:       [[PRED_STORE_CONTINUE6]]:
1091; VF16UF1-NEXT:    [[TMP24:%.*]] = extractelement <16 x i1> [[TMP3]], i32 4
1092; VF16UF1-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
1093; VF16UF1:       [[PRED_STORE_IF7]]:
1094; VF16UF1-NEXT:    [[TMP25:%.*]] = mul i64 4, [[STEP]]
1095; VF16UF1-NEXT:    [[TMP26:%.*]] = add i64 0, [[TMP25]]
1096; VF16UF1-NEXT:    [[TMP27:%.*]] = add i64 [[TMP26]], [[STEP]]
1097; VF16UF1-NEXT:    [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
1098; VF16UF1-NEXT:    store i8 0, ptr [[TMP28]], align 1
1099; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE8]]
1100; VF16UF1:       [[PRED_STORE_CONTINUE8]]:
1101; VF16UF1-NEXT:    [[TMP29:%.*]] = extractelement <16 x i1> [[TMP3]], i32 5
1102; VF16UF1-NEXT:    br i1 [[TMP29]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
1103; VF16UF1:       [[PRED_STORE_IF9]]:
1104; VF16UF1-NEXT:    [[TMP30:%.*]] = mul i64 5, [[STEP]]
1105; VF16UF1-NEXT:    [[TMP31:%.*]] = add i64 0, [[TMP30]]
1106; VF16UF1-NEXT:    [[TMP32:%.*]] = add i64 [[TMP31]], [[STEP]]
1107; VF16UF1-NEXT:    [[TMP33:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP32]]
1108; VF16UF1-NEXT:    store i8 0, ptr [[TMP33]], align 1
1109; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE10]]
1110; VF16UF1:       [[PRED_STORE_CONTINUE10]]:
1111; VF16UF1-NEXT:    [[TMP34:%.*]] = extractelement <16 x i1> [[TMP3]], i32 6
1112; VF16UF1-NEXT:    br i1 [[TMP34]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
1113; VF16UF1:       [[PRED_STORE_IF11]]:
1114; VF16UF1-NEXT:    [[TMP35:%.*]] = mul i64 6, [[STEP]]
1115; VF16UF1-NEXT:    [[TMP36:%.*]] = add i64 0, [[TMP35]]
1116; VF16UF1-NEXT:    [[TMP37:%.*]] = add i64 [[TMP36]], [[STEP]]
1117; VF16UF1-NEXT:    [[TMP38:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP37]]
1118; VF16UF1-NEXT:    store i8 0, ptr [[TMP38]], align 1
1119; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE12]]
1120; VF16UF1:       [[PRED_STORE_CONTINUE12]]:
1121; VF16UF1-NEXT:    [[TMP39:%.*]] = extractelement <16 x i1> [[TMP3]], i32 7
1122; VF16UF1-NEXT:    br i1 [[TMP39]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
1123; VF16UF1:       [[PRED_STORE_IF13]]:
1124; VF16UF1-NEXT:    [[TMP40:%.*]] = mul i64 7, [[STEP]]
1125; VF16UF1-NEXT:    [[TMP41:%.*]] = add i64 0, [[TMP40]]
1126; VF16UF1-NEXT:    [[TMP42:%.*]] = add i64 [[TMP41]], [[STEP]]
1127; VF16UF1-NEXT:    [[TMP43:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP42]]
1128; VF16UF1-NEXT:    store i8 0, ptr [[TMP43]], align 1
1129; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE14]]
1130; VF16UF1:       [[PRED_STORE_CONTINUE14]]:
1131; VF16UF1-NEXT:    [[TMP44:%.*]] = extractelement <16 x i1> [[TMP3]], i32 8
1132; VF16UF1-NEXT:    br i1 [[TMP44]], label %[[PRED_STORE_IF15:.*]], label %[[PRED_STORE_CONTINUE16:.*]]
1133; VF16UF1:       [[PRED_STORE_IF15]]:
1134; VF16UF1-NEXT:    [[TMP45:%.*]] = mul i64 8, [[STEP]]
1135; VF16UF1-NEXT:    [[TMP46:%.*]] = add i64 0, [[TMP45]]
1136; VF16UF1-NEXT:    [[TMP47:%.*]] = add i64 [[TMP46]], [[STEP]]
1137; VF16UF1-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP47]]
1138; VF16UF1-NEXT:    store i8 0, ptr [[TMP48]], align 1
1139; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE16]]
1140; VF16UF1:       [[PRED_STORE_CONTINUE16]]:
1141; VF16UF1-NEXT:    [[TMP49:%.*]] = extractelement <16 x i1> [[TMP3]], i32 9
1142; VF16UF1-NEXT:    br i1 [[TMP49]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
1143; VF16UF1:       [[PRED_STORE_IF17]]:
1144; VF16UF1-NEXT:    [[TMP50:%.*]] = mul i64 9, [[STEP]]
1145; VF16UF1-NEXT:    [[TMP51:%.*]] = add i64 0, [[TMP50]]
1146; VF16UF1-NEXT:    [[TMP52:%.*]] = add i64 [[TMP51]], [[STEP]]
1147; VF16UF1-NEXT:    [[TMP53:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP52]]
1148; VF16UF1-NEXT:    store i8 0, ptr [[TMP53]], align 1
1149; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE18]]
1150; VF16UF1:       [[PRED_STORE_CONTINUE18]]:
1151; VF16UF1-NEXT:    [[TMP54:%.*]] = extractelement <16 x i1> [[TMP3]], i32 10
1152; VF16UF1-NEXT:    br i1 [[TMP54]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
1153; VF16UF1:       [[PRED_STORE_IF19]]:
1154; VF16UF1-NEXT:    [[TMP55:%.*]] = mul i64 10, [[STEP]]
1155; VF16UF1-NEXT:    [[TMP56:%.*]] = add i64 0, [[TMP55]]
1156; VF16UF1-NEXT:    [[TMP57:%.*]] = add i64 [[TMP56]], [[STEP]]
1157; VF16UF1-NEXT:    [[TMP58:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP57]]
1158; VF16UF1-NEXT:    store i8 0, ptr [[TMP58]], align 1
1159; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE20]]
1160; VF16UF1:       [[PRED_STORE_CONTINUE20]]:
1161; VF16UF1-NEXT:    [[TMP59:%.*]] = extractelement <16 x i1> [[TMP3]], i32 11
1162; VF16UF1-NEXT:    br i1 [[TMP59]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
1163; VF16UF1:       [[PRED_STORE_IF21]]:
1164; VF16UF1-NEXT:    [[TMP60:%.*]] = mul i64 11, [[STEP]]
1165; VF16UF1-NEXT:    [[TMP61:%.*]] = add i64 0, [[TMP60]]
1166; VF16UF1-NEXT:    [[TMP62:%.*]] = add i64 [[TMP61]], [[STEP]]
1167; VF16UF1-NEXT:    [[TMP63:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP62]]
1168; VF16UF1-NEXT:    store i8 0, ptr [[TMP63]], align 1
1169; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE22]]
1170; VF16UF1:       [[PRED_STORE_CONTINUE22]]:
1171; VF16UF1-NEXT:    [[TMP64:%.*]] = extractelement <16 x i1> [[TMP3]], i32 12
1172; VF16UF1-NEXT:    br i1 [[TMP64]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
1173; VF16UF1:       [[PRED_STORE_IF23]]:
1174; VF16UF1-NEXT:    [[TMP65:%.*]] = mul i64 12, [[STEP]]
1175; VF16UF1-NEXT:    [[TMP66:%.*]] = add i64 0, [[TMP65]]
1176; VF16UF1-NEXT:    [[TMP67:%.*]] = add i64 [[TMP66]], [[STEP]]
1177; VF16UF1-NEXT:    [[TMP68:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP67]]
1178; VF16UF1-NEXT:    store i8 0, ptr [[TMP68]], align 1
1179; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE24]]
1180; VF16UF1:       [[PRED_STORE_CONTINUE24]]:
1181; VF16UF1-NEXT:    [[TMP69:%.*]] = extractelement <16 x i1> [[TMP3]], i32 13
1182; VF16UF1-NEXT:    br i1 [[TMP69]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
1183; VF16UF1:       [[PRED_STORE_IF25]]:
1184; VF16UF1-NEXT:    [[TMP70:%.*]] = mul i64 13, [[STEP]]
1185; VF16UF1-NEXT:    [[TMP71:%.*]] = add i64 0, [[TMP70]]
1186; VF16UF1-NEXT:    [[TMP72:%.*]] = add i64 [[TMP71]], [[STEP]]
1187; VF16UF1-NEXT:    [[TMP73:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP72]]
1188; VF16UF1-NEXT:    store i8 0, ptr [[TMP73]], align 1
1189; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE26]]
1190; VF16UF1:       [[PRED_STORE_CONTINUE26]]:
1191; VF16UF1-NEXT:    [[TMP74:%.*]] = extractelement <16 x i1> [[TMP3]], i32 14
1192; VF16UF1-NEXT:    br i1 [[TMP74]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
1193; VF16UF1:       [[PRED_STORE_IF27]]:
1194; VF16UF1-NEXT:    [[TMP75:%.*]] = mul i64 14, [[STEP]]
1195; VF16UF1-NEXT:    [[TMP76:%.*]] = add i64 0, [[TMP75]]
1196; VF16UF1-NEXT:    [[TMP77:%.*]] = add i64 [[TMP76]], [[STEP]]
1197; VF16UF1-NEXT:    [[TMP78:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP77]]
1198; VF16UF1-NEXT:    store i8 0, ptr [[TMP78]], align 1
1199; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE28]]
1200; VF16UF1:       [[PRED_STORE_CONTINUE28]]:
1201; VF16UF1-NEXT:    [[TMP79:%.*]] = extractelement <16 x i1> [[TMP3]], i32 15
1202; VF16UF1-NEXT:    br i1 [[TMP79]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
1203; VF16UF1:       [[PRED_STORE_IF29]]:
1204; VF16UF1-NEXT:    [[TMP80:%.*]] = mul i64 15, [[STEP]]
1205; VF16UF1-NEXT:    [[TMP81:%.*]] = add i64 0, [[TMP80]]
1206; VF16UF1-NEXT:    [[TMP82:%.*]] = add i64 [[TMP81]], [[STEP]]
1207; VF16UF1-NEXT:    [[TMP83:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP82]]
1208; VF16UF1-NEXT:    store i8 0, ptr [[TMP83]], align 1
1209; VF16UF1-NEXT:    br label %[[PRED_STORE_CONTINUE30]]
1210; VF16UF1:       [[PRED_STORE_CONTINUE30]]:
1211; VF16UF1-NEXT:    br label %[[MIDDLE_BLOCK:.*]]
1212; VF16UF1:       [[MIDDLE_BLOCK]]:
1213; VF16UF1-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
1214; VF16UF1:       [[SCALAR_PH]]:
1215; VF16UF1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1216; VF16UF1-NEXT:    br label %[[LOOP:.*]]
1217; VF16UF1:       [[LOOP]]:
1218; VF16UF1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1219; VF16UF1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], [[STEP]]
1220; VF16UF1-NEXT:    [[GEP_DST:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV_NEXT]]
1221; VF16UF1-NEXT:    store i8 0, ptr [[GEP_DST]], align 1
1222; VF16UF1-NEXT:    [[EC:%.*]] = icmp slt i64 [[IV_NEXT]], 16
1223; VF16UF1-NEXT:    br i1 [[EC]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]]
1224; VF16UF1:       [[EXIT]]:
1225; VF16UF1-NEXT:    ret void
1226;
1227entry:
1228  %c = icmp eq i64 %x, 65536
1229  call void @llvm.assume(i1 %c)
1230  %fr = freeze i64 %x
1231  %step = add i64 %fr, -65534
1232  br label %loop
1233
1234loop:
1235  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1236  %iv.next = add i64 %iv, %step
1237  %gep.dst = getelementptr i8, ptr %dst, i64 %iv.next
1238  store i8 0, ptr %gep.dst, align 1
1239  %ec = icmp slt i64 %iv.next, 16
1240  br i1 %ec, label %loop, label %exit
1241
1242exit:
1243  ret void
1244}
1245
1246;.
1247; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1248; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
1249; VF8UF1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
1250; VF8UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
1251; VF8UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
1252; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1253; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1254; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1255;.
1256; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1257; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
1258; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
1259; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
1260; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1261; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1262;.
1263; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1264; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
1265; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
1266; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
1267; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1268; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1269;.
1270