xref: /llvm-project/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll (revision 82821254f532c1dbdfd5d985ef7130511efaaa83)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s
3; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s -check-prefix=VF2UF2
4; RUN: opt < %s -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -S | FileCheck %s -check-prefix=VF1UF4
5
6target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
7
8; Make sure a loop is vectorized correctly with fold-tail when the constant
9; trip-count is not a multiple of -force-vector-width and/or
10; -force-vector-interleave, but is a multiple of the internally computed MaxVF;
11; e.g., when all types are i32 lead to MaxVF=1.
12
13define void @pr45679(ptr %A) optsize {
14; CHECK-LABEL: @pr45679(
15; CHECK-NEXT:  entry:
16; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
17; CHECK:       vector.ph:
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
21; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
22; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], splat (i32 13)
23; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
24; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
25; CHECK:       pred.store.if:
26; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
27; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP2]]
28; CHECK-NEXT:    store i32 13, ptr [[TMP3]], align 1
29; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
30; CHECK:       pred.store.continue:
31; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
32; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
33; CHECK:       pred.store.if1:
34; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
35; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
36; CHECK-NEXT:    store i32 13, ptr [[TMP6]], align 1
37; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
38; CHECK:       pred.store.continue2:
39; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
40; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
41; CHECK:       pred.store.if3:
42; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[INDEX]], 2
43; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
44; CHECK-NEXT:    store i32 13, ptr [[TMP9]], align 1
45; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
46; CHECK:       pred.store.continue4:
47; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
48; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
49; CHECK:       pred.store.if5:
50; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[INDEX]], 3
51; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
52; CHECK-NEXT:    store i32 13, ptr [[TMP12]], align 1
53; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
54; CHECK:       pred.store.continue6:
55; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
56; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
57; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
58; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
59; CHECK:       middle.block:
60; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
61; CHECK:       scalar.ph:
62; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
63; CHECK-NEXT:    br label [[LOOP:%.*]]
64; CHECK:       loop:
65; CHECK-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
66; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
67; CHECK-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
68; CHECK-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
69; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
70; CHECK-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
71; CHECK:       exit:
72; CHECK-NEXT:    ret void
73;
74; VF2UF2-LABEL: @pr45679(
75; VF2UF2-NEXT:  entry:
76; VF2UF2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
77; VF2UF2:       vector.ph:
78; VF2UF2-NEXT:    br label [[VECTOR_BODY:%.*]]
79; VF2UF2:       vector.body:
80; VF2UF2-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
81; VF2UF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
82; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
83; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 13)
84; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[STEP_ADD]], splat (i32 13)
85; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
86; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
87; VF2UF2:       pred.store.if:
88; VF2UF2-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
89; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]]
90; VF2UF2-NEXT:    store i32 13, ptr [[TMP4]], align 1
91; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
92; VF2UF2:       pred.store.continue:
93; VF2UF2-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
94; VF2UF2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
95; VF2UF2:       pred.store.if1:
96; VF2UF2-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
97; VF2UF2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP6]]
98; VF2UF2-NEXT:    store i32 13, ptr [[TMP7]], align 1
99; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
100; VF2UF2:       pred.store.continue2:
101; VF2UF2-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
102; VF2UF2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
103; VF2UF2:       pred.store.if3:
104; VF2UF2-NEXT:    [[TMP9:%.*]] = add i32 [[INDEX]], 2
105; VF2UF2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP9]]
106; VF2UF2-NEXT:    store i32 13, ptr [[TMP10]], align 1
107; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
108; VF2UF2:       pred.store.continue4:
109; VF2UF2-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
110; VF2UF2-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
111; VF2UF2:       pred.store.if5:
112; VF2UF2-NEXT:    [[TMP12:%.*]] = add i32 [[INDEX]], 3
113; VF2UF2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP12]]
114; VF2UF2-NEXT:    store i32 13, ptr [[TMP13]], align 1
115; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
116; VF2UF2:       pred.store.continue6:
117; VF2UF2-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
118; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
119; VF2UF2-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
120; VF2UF2-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
121; VF2UF2:       middle.block:
122; VF2UF2-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
123; VF2UF2:       scalar.ph:
124; VF2UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
125; VF2UF2-NEXT:    br label [[LOOP:%.*]]
126; VF2UF2:       loop:
127; VF2UF2-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
128; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
129; VF2UF2-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
130; VF2UF2-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
131; VF2UF2-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
132; VF2UF2-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
133; VF2UF2:       exit:
134; VF2UF2-NEXT:    ret void
135;
136; VF1UF4-LABEL: @pr45679(
137; VF1UF4-NEXT:  entry:
138; VF1UF4-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
139; VF1UF4:       vector.ph:
140; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
141; VF1UF4:       vector.body:
142; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
143; VF1UF4-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
144; VF1UF4-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
145; VF1UF4-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
146; VF1UF4-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
147; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13
148; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13
149; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13
150; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13
151; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
152; VF1UF4:       pred.store.if:
153; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]]
154; VF1UF4-NEXT:    store i32 13, ptr [[TMP8]], align 1
155; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
156; VF1UF4:       pred.store.continue:
157; VF1UF4-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
158; VF1UF4:       pred.store.if1:
159; VF1UF4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]]
160; VF1UF4-NEXT:    store i32 13, ptr [[TMP9]], align 1
161; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE2]]
162; VF1UF4:       pred.store.continue2:
163; VF1UF4-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
164; VF1UF4:       pred.store.if3:
165; VF1UF4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP2]]
166; VF1UF4-NEXT:    store i32 13, ptr [[TMP10]], align 1
167; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE4]]
168; VF1UF4:       pred.store.continue4:
169; VF1UF4-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
170; VF1UF4:       pred.store.if5:
171; VF1UF4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP3]]
172; VF1UF4-NEXT:    store i32 13, ptr [[TMP11]], align 1
173; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE6]]
174; VF1UF4:       pred.store.continue6:
175; VF1UF4-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
176; VF1UF4-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
177; VF1UF4-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
178; VF1UF4:       middle.block:
179; VF1UF4-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
180; VF1UF4:       scalar.ph:
181; VF1UF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
182; VF1UF4-NEXT:    br label [[LOOP:%.*]]
183; VF1UF4:       loop:
184; VF1UF4-NEXT:    [[RIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[RIVPLUS1:%.*]], [[LOOP]] ]
185; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[RIV]]
186; VF1UF4-NEXT:    store i32 13, ptr [[ARRAYIDX]], align 1
187; VF1UF4-NEXT:    [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
188; VF1UF4-NEXT:    [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14
189; VF1UF4-NEXT:    br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
190; VF1UF4:       exit:
191; VF1UF4-NEXT:    ret void
192;
193entry:
194  br label %loop
195
196loop:
197  %riv = phi i32 [ 0, %entry ], [ %rivPlus1, %loop ]
198  %arrayidx = getelementptr inbounds i32, ptr %A, i32 %riv
199  store i32 13, ptr %arrayidx, align 1
200  %rivPlus1 = add nuw nsw i32 %riv, 1
201  %cond = icmp eq i32 %rivPlus1, 14
202  br i1 %cond, label %exit, label %loop
203
204exit:
205  ret void
206}
207
208define void @load_variant(ptr noalias %a, ptr noalias %b) {
209; CHECK-LABEL: @load_variant(
210; CHECK-NEXT:  entry:
211; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
212; CHECK:       vector.ph:
213; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
214; CHECK:       vector.body:
215; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
216; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
217; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule <4 x i64> [[VEC_IND]], splat (i64 13)
218; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
219; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
220; CHECK:       pred.store.if:
221; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
222; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]]
223; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
224; CHECK-NEXT:    store i64 [[TMP4]], ptr [[B:%.*]], align 8
225; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
226; CHECK:       pred.store.continue:
227; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
228; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
229; CHECK:       pred.store.if1:
230; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 1
231; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP7]]
232; CHECK-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
233; CHECK-NEXT:    store i64 [[TMP9]], ptr [[B]], align 8
234; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
235; CHECK:       pred.store.continue2:
236; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
237; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
238; CHECK:       pred.store.if3:
239; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 2
240; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP12]]
241; CHECK-NEXT:    [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8
242; CHECK-NEXT:    store i64 [[TMP14]], ptr [[B]], align 8
243; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE4]]
244; CHECK:       pred.store.continue4:
245; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
246; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
247; CHECK:       pred.store.if5:
248; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 3
249; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP17]]
250; CHECK-NEXT:    [[TMP19:%.*]] = load i64, ptr [[TMP18]], align 8
251; CHECK-NEXT:    store i64 [[TMP19]], ptr [[B]], align 8
252; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE6]]
253; CHECK:       pred.store.continue6:
254; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
255; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
256; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
257; CHECK-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
258; CHECK:       middle.block:
259; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
260; CHECK:       scalar.ph:
261; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
262; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
263; CHECK:       for.body:
264; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
265; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
266; CHECK-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
267; CHECK-NEXT:    store i64 [[V]], ptr [[B]], align 8
268; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
269; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
270; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
271; CHECK:       for.end:
272; CHECK-NEXT:    ret void
273;
274; VF2UF2-LABEL: @load_variant(
275; VF2UF2-NEXT:  entry:
276; VF2UF2-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
277; VF2UF2:       vector.ph:
278; VF2UF2-NEXT:    br label [[VECTOR_BODY:%.*]]
279; VF2UF2:       vector.body:
280; VF2UF2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
281; VF2UF2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE7]] ]
282; VF2UF2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
283; VF2UF2-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IND]], splat (i64 13)
284; VF2UF2-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> [[STEP_ADD]], splat (i64 13)
285; VF2UF2-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
286; VF2UF2-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
287; VF2UF2:       pred.store.if:
288; VF2UF2-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
289; VF2UF2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]]
290; VF2UF2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8
291; VF2UF2-NEXT:    store i64 [[TMP5]], ptr [[B:%.*]], align 8
292; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE]]
293; VF2UF2:       pred.store.continue:
294; VF2UF2-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
295; VF2UF2-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
296; VF2UF2:       pred.store.if1:
297; VF2UF2-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
298; VF2UF2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP8]]
299; VF2UF2-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP9]], align 8
300; VF2UF2-NEXT:    store i64 [[TMP10]], ptr [[B]], align 8
301; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE3]]
302; VF2UF2:       pred.store.continue2:
303; VF2UF2-NEXT:    [[TMP12:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
304; VF2UF2-NEXT:    br i1 [[TMP12]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
305; VF2UF2:       pred.store.if3:
306; VF2UF2-NEXT:    [[TMP13:%.*]] = add i64 [[INDEX]], 2
307; VF2UF2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP13]]
308; VF2UF2-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8
309; VF2UF2-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
310; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
311; VF2UF2:       pred.store.continue4:
312; VF2UF2-NEXT:    [[TMP17:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
313; VF2UF2-NEXT:    br i1 [[TMP17]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
314; VF2UF2:       pred.store.if5:
315; VF2UF2-NEXT:    [[TMP18:%.*]] = add i64 [[INDEX]], 3
316; VF2UF2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP18]]
317; VF2UF2-NEXT:    [[TMP20:%.*]] = load i64, ptr [[TMP19]], align 8
318; VF2UF2-NEXT:    store i64 [[TMP20]], ptr [[B]], align 8
319; VF2UF2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
320; VF2UF2:       pred.store.continue6:
321; VF2UF2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
322; VF2UF2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
323; VF2UF2-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
324; VF2UF2-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
325; VF2UF2:       middle.block:
326; VF2UF2-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
327; VF2UF2:       scalar.ph:
328; VF2UF2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
329; VF2UF2-NEXT:    br label [[FOR_BODY:%.*]]
330; VF2UF2:       for.body:
331; VF2UF2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
332; VF2UF2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
333; VF2UF2-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
334; VF2UF2-NEXT:    store i64 [[V]], ptr [[B]], align 8
335; VF2UF2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
336; VF2UF2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
337; VF2UF2-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
338; VF2UF2:       for.end:
339; VF2UF2-NEXT:    ret void
340;
341; VF1UF4-LABEL: @load_variant(
342; VF1UF4-NEXT:  entry:
343; VF1UF4-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
344; VF1UF4:       vector.ph:
345; VF1UF4-NEXT:    br label [[VECTOR_BODY:%.*]]
346; VF1UF4:       vector.body:
347; VF1UF4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
348; VF1UF4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
349; VF1UF4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
350; VF1UF4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
351; VF1UF4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
352; VF1UF4-NEXT:    [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13
353; VF1UF4-NEXT:    [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13
354; VF1UF4-NEXT:    [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13
355; VF1UF4-NEXT:    [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13
356; VF1UF4-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
357; VF1UF4:       pred.store.if:
358; VF1UF4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
359; VF1UF4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8
360; VF1UF4-NEXT:    store i64 [[TMP9]], ptr [[B:%.*]], align 8
361; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE]]
362; VF1UF4:       pred.store.continue:
363; VF1UF4-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
364; VF1UF4:       pred.store.if1:
365; VF1UF4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
366; VF1UF4-NEXT:    [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8
367; VF1UF4-NEXT:    store i64 [[TMP12]], ptr [[B]], align 8
368; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE2]]
369; VF1UF4:       pred.store.continue2:
370; VF1UF4-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
371; VF1UF4:       pred.store.if3:
372; VF1UF4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
373; VF1UF4-NEXT:    [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8
374; VF1UF4-NEXT:    store i64 [[TMP15]], ptr [[B]], align 8
375; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE4]]
376; VF1UF4:       pred.store.continue4:
377; VF1UF4-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
378; VF1UF4:       pred.store.if5:
379; VF1UF4-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
380; VF1UF4-NEXT:    [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8
381; VF1UF4-NEXT:    store i64 [[TMP18]], ptr [[B]], align 8
382; VF1UF4-NEXT:    br label [[PRED_STORE_CONTINUE6]]
383; VF1UF4:       pred.store.continue6:
384; VF1UF4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385; VF1UF4-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
386; VF1UF4-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
387; VF1UF4:       middle.block:
388; VF1UF4-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
389; VF1UF4:       scalar.ph:
390; VF1UF4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
391; VF1UF4-NEXT:    br label [[FOR_BODY:%.*]]
392; VF1UF4:       for.body:
393; VF1UF4-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
394; VF1UF4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]]
395; VF1UF4-NEXT:    [[V:%.*]] = load i64, ptr [[ARRAYIDX]], align 8
396; VF1UF4-NEXT:    store i64 [[V]], ptr [[B]], align 8
397; VF1UF4-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
398; VF1UF4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14
399; VF1UF4-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
400; VF1UF4:       for.end:
401; VF1UF4-NEXT:    ret void
402;
403entry:
404  br label %for.body
405
406for.body:
407  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
408  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv
409  %v = load i64, ptr %arrayidx
410  store i64 %v, ptr %b
411  %iv.next = add nuw nsw i64 %iv, 1
412  %exitcond.not = icmp eq i64 %iv.next, 14
413  br i1 %exitcond.not, label %for.end, label %for.body
414
415for.end:
416  ret void
417}
418