xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -p loop-vectorize -mtriple=arm64-apple-macosx -S %s | FileCheck %s
3
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5
6define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) {
7; CHECK-LABEL: define i32 @multi_exit_iv_uniform(
8; CHECK-SAME: i32 [[A:%.*]], i64 [[N:%.*]], ptr [[DST:%.*]]) {
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 2147483648)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[UMIN]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP0]], 8
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 8
16; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
17; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 8, i64 [[N_MOD_VF]]
18; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[TMP2]]
19; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
20; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
21; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i32> [[BROADCAST_SPLAT]] to <4 x i64>
22; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
23; CHECK:       vector.body:
24; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
26; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
27; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
28; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP3]]
29; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
30; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4
31; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP8]], align 8
32; CHECK-NEXT:    store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8
33; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1)
34; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[VEC_PHI1]], splat (i32 -1)
35; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
36; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38; CHECK:       middle.block:
39; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[TMP10]]
40; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
41; CHECK-NEXT:    br label [[SCALAR_PH]]
42; CHECK:       scalar.ph:
43; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
44; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
45; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
46; CHECK:       loop.header:
47; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
48; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP_LATCH]] ]
49; CHECK-NEXT:    [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]]
50; CHECK-NEXT:    br i1 [[C_1]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]]
51; CHECK:       loop.latch:
52; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr i64, ptr [[DST]], i64 [[IV]]
53; CHECK-NEXT:    [[CONV7:%.*]] = zext i32 [[A]] to i64
54; CHECK-NEXT:    store i64 [[CONV7]], ptr [[ARRAYIDX_I]], align 8
55; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
56; CHECK-NEXT:    [[IV_2_NEXT]] = add i32 [[IV_2]], -1
57; CHECK-NEXT:    [[C_2:%.*]] = icmp eq i64 [[IV]], 2147483648
58; CHECK-NEXT:    br i1 [[C_2]], label [[EXIT_2:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
59; CHECK:       exit.1:
60; CHECK-NEXT:    ret i32 10
61; CHECK:       exit.2:
62; CHECK-NEXT:    [[IV_2_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_2_NEXT]], [[LOOP_LATCH]] ]
63; CHECK-NEXT:    ret i32 [[IV_2_NEXT_LCSSA]]
64;
65entry:
66  br label %loop.header
67
68loop.header:
69  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
70  %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop.latch ]
71  %c.1 = icmp eq i64 %iv, %N
72  br i1 %c.1, label %exit.1, label %loop.latch
73
74loop.latch:
75  %arrayidx.i = getelementptr i64, ptr %dst, i64 %iv
76  %conv7 = zext i32 %a to i64
77  store i64 %conv7, ptr %arrayidx.i, align 8
78  %iv.next = add i64 %iv, 1
79  %iv.2.next = add i32 %iv.2, -1
80  %c.2 = icmp eq i64 %iv, 2147483648
81  br i1 %c.2, label %exit.2, label %loop.header
82
83exit.1:
84  ret i32 10
85
86exit.2:
87  ret i32 %iv.2.next
88}
89
90define i64 @pointer_induction_only(ptr %start, ptr %end) {
91; CHECK-LABEL: define i64 @pointer_induction_only(
92; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) {
93; CHECK-NEXT:  entry:
94; CHECK-NEXT:    [[START2:%.*]] = ptrtoint ptr [[START]] to i64
95; CHECK-NEXT:    [[END1:%.*]] = ptrtoint ptr [[END]] to i64
96; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[END1]], [[START2]]
97; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
98; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
99; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
100; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
101; CHECK:       vector.ph:
102; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
103; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
104; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
105; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP3]]
106; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
107; CHECK:       vector.body:
108; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
109; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ <i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
110; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
111; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
112; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
113; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2
114; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1
115; CHECK-NEXT:    [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64>
116; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
117; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
118; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
119; CHECK:       middle.block:
120; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
121; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
122; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
123; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
124; CHECK:       scalar.ph:
125; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY:%.*]] ]
126; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
127; CHECK-NEXT:    br label [[LOOP:%.*]]
128; CHECK:       loop:
129; CHECK-NEXT:    [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
130; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ]
131; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[IV]], align 1
132; CHECK-NEXT:    [[RECUR_NEXT]] = zext i32 [[L]] to i64
133; CHECK-NEXT:    [[IV_NEXT]] = getelementptr inbounds i8, ptr [[IV]], i64 4
134; CHECK-NEXT:    [[C:%.*]] = icmp eq ptr [[IV]], [[END]]
135; CHECK-NEXT:    br i1 [[C]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
136; CHECK:       exit:
137; CHECK-NEXT:    [[RECUR_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
138; CHECK-NEXT:    ret i64 [[RECUR_LCSSA]]
139;
140entry:
141  br label %loop
142
143loop:
144  %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
145  %recur = phi i64 [ 0, %entry ], [ %recur.next, %loop ]
146  %l = load i32, ptr %iv, align 1
147  %recur.next = zext i32 %l to i64
148  %iv.next = getelementptr inbounds i8, ptr %iv, i64 4
149  %c = icmp eq ptr %iv, %end
150  br i1 %c, label %exit, label %loop
151
152exit:
153  ret i64 %recur
154}
155
156
157define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
158; CHECK-LABEL: define i64 @int_and_pointer_iv(
159; CHECK-SAME: ptr [[START:%.*]], i32 [[N:%.*]]) {
160; CHECK-NEXT:  entry:
161; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
162; CHECK:       vector.ph:
163; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 4000
164; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
165; CHECK:       vector.body:
166; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
167; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
168; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
169; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
170; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP0]]
171; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
172; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
173; CHECK-NEXT:    [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64>
174; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
175; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
176; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
177; CHECK:       middle.block:
178; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP5]], i32 2
179; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3
180; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
181; CHECK:       scalar.ph:
182; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
183; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START]], [[ENTRY]] ]
184; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
185; CHECK-NEXT:    br label [[LOOP:%.*]]
186; CHECK:       loop:
187; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
188; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
189; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[RECUR_NEXT:%.*]], [[LOOP]] ]
190; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4
191; CHECK-NEXT:    [[RECUR_NEXT]] = zext i32 [[L]] to i64
192; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4
193; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
194; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], 1000
195; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
196; CHECK:       exit:
197; CHECK-NEXT:    [[RECUR_LCSSA:%.*]] = phi i64 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
198; CHECK-NEXT:    ret i64 [[RECUR_LCSSA]]
199;
200entry:
201  br label %loop
202
203loop:
204  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
205  %ptr.iv = phi ptr [ %start, %entry ], [ %ptr.iv.next, %loop ]
206  %recur = phi i64 [ 0, %entry ], [ %recur.next, %loop ]
207  %l = load i32, ptr %ptr.iv, align 4
208  %recur.next = zext i32 %l to i64
209  %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 4
210  %iv.next = add i32 %iv, 1
211  %tobool.not = icmp eq i32 %iv.next, 1000
212  br i1 %tobool.not, label %exit, label %loop
213
214exit:
215  ret i64 %recur
216}
217
218define void @wide_truncated_iv(ptr %dst) {
219; CHECK-LABEL: define void @wide_truncated_iv(
220; CHECK-SAME: ptr [[DST:%.*]]) {
221; CHECK-NEXT:  iter.check:
222; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
223; CHECK:       vector.main.loop.iter.check:
224; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH1:%.*]]
225; CHECK:       vector.ph:
226; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
227; CHECK:       vector.body:
228; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
229; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
230; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
231; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
232; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]]
233; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i32 0
234; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 8
235; CHECK-NEXT:    store <8 x i8> [[VEC_IND]], ptr [[TMP4]], align 1
236; CHECK-NEXT:    store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1
237; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
238; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <8 x i8> [[STEP_ADD]], splat (i8 8)
239; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 192
240; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
241; CHECK:       middle.block:
242; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
243; CHECK:       vec.epilog.iter.check:
244; CHECK-NEXT:    br i1 false, label [[SCALAR_PH]], label [[VEC_EPILOG_PH]]
245; CHECK:       vec.epilog.ph:
246; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 192, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_PH]] ]
247; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8
248; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP7]], i64 0
249; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
250; CHECK-NEXT:    [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
251; CHECK-NEXT:    br label [[LOOP:%.*]]
252; CHECK:       vec.epilog.vector.body:
253; CHECK-NEXT:    [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], [[LOOP]] ]
254; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <8 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[LOOP]] ]
255; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX3]], 0
256; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
257; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
258; CHECK-NEXT:    store <8 x i8> [[VEC_IND4]], ptr [[TMP10]], align 1
259; CHECK-NEXT:    [[INDEX_NEXT7]] = add nuw i64 [[INDEX3]], 8
260; CHECK-NEXT:    [[VEC_IND_NEXT6]] = add <8 x i8> [[VEC_IND4]], splat (i8 8)
261; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 200
262; CHECK-NEXT:    br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
263; CHECK:       vec.epilog.middle.block:
264; CHECK-NEXT:    br i1 false, label [[EXIT]], label [[SCALAR_PH]]
265; CHECK:       vec.epilog.scalar.ph:
266; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i64 [ 200, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ 192, [[VEC_EPILOG_ITER_CHECK]] ]
267; CHECK-NEXT:    br label [[LOOP1:%.*]]
268; CHECK:       loop:
269; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP1]] ]
270; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[IV]] to i8
271; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
272; CHECK-NEXT:    store i8 [[TRUNC_IV]], ptr [[GEP]], align 1
273; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
274; CHECK-NEXT:    [[C:%.*]] = icmp eq i64 [[IV]], 200
275; CHECK-NEXT:    br i1 [[C]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP10:![0-9]+]]
276; CHECK:       exit:
277; CHECK-NEXT:    ret void
278;
279entry:
280  br label %loop
281
282loop:
283  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
284  %trunc.iv = trunc i64 %iv to i8
285  %gep = getelementptr i8, ptr %dst, i64 %iv
286  store i8 %trunc.iv, ptr %gep, align 1
287  %iv.next = add i64 %iv, 1
288  %c = icmp eq i64 %iv, 200
289  br i1 %c, label %exit, label %loop
290
291exit:
292  ret void
293}
294
295define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) {
296; CHECK-LABEL: define i64 @test_ptr_ivs_and_widened_ivs(
297; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) {
298; CHECK-NEXT:  entry:
299; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
300; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
301; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
302; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
303; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
304; CHECK:       vector.ph:
305; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
306; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
307; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], 4
308; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP3]]
309; CHECK-NEXT:    [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32
310; CHECK-NEXT:    [[IND_END3:%.*]] = trunc i64 [[N_VEC]] to i32
311; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
312; CHECK:       vector.body:
313; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
314; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
315; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
316; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
317; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
318; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
319; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
320; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4
321; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
322; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1)
323; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64>
324; CHECK-NEXT:    [[TMP9:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64>
325; CHECK-NEXT:    [[TMP10]] = shl <4 x i64> [[TMP8]], [[TMP9]]
326; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
327; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
328; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
329; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
330; CHECK:       middle.block:
331; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP10]], i32 2
332; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP10]], i32 3
333; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
334; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
335; CHECK:       scalar.ph:
336; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
337; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[SRC]], [[ENTRY]] ]
338; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
339; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
340; CHECK-NEXT:    br label [[LOOP:%.*]]
341; CHECK:       loop:
342; CHECK-NEXT:    [[P:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[SHL:%.*]], [[LOOP]] ]
343; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
344; CHECK-NEXT:    [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
345; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
346; CHECK-NEXT:    [[L:%.*]] = load i32, ptr [[PTR_IV]], align 4
347; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[L]], 1
348; CHECK-NEXT:    [[NOT_EXT:%.*]] = zext i32 [[NOT]] to i64
349; CHECK-NEXT:    [[IV_EXT:%.*]] = zext i32 [[IV_1]] to i64
350; CHECK-NEXT:    [[SHL]] = shl i64 [[NOT_EXT]], [[IV_EXT]]
351; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 4
352; CHECK-NEXT:    [[IV_1_NEXT]] = add i32 [[IV_1]], 1
353; CHECK-NEXT:    [[IV_2_NEXT]] = add i32 [[IV_2]], 1
354; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_2_NEXT]], [[N]]
355; CHECK-NEXT:    br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
356; CHECK:       exit:
357; CHECK-NEXT:    [[P_LCSSA:%.*]] = phi i64 [ [[P]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
358; CHECK-NEXT:    ret i64 [[P_LCSSA]]
359;
360entry:
361  br label %loop
362
363loop:
364  %p = phi i64 [ 0, %entry ], [ %shl, %loop ]
365  %ptr.iv = phi ptr [ %src, %entry ], [ %ptr.iv.next, %loop ]
366  %iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop ]
367  %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ]
368  %l = load i32, ptr %ptr.iv, align 4
369  %not = xor i32 %l, 1
370  %not.ext = zext i32 %not to i64
371  %iv.ext = zext i32 %iv.1 to i64
372  %shl = shl i64 %not.ext , %iv.ext
373  %ptr.iv.next = getelementptr i8, ptr %ptr.iv, i64 4
374  %iv.1.next = add i32 %iv.1, 1
375  %iv.2.next = add i32 %iv.2, 1
376  %ec = icmp eq i32 %iv.2.next, %N
377  br i1 %ec, label %exit, label %loop
378
379exit:
380  ret i64 %p
381}
382
383define void @zext_iv_increment(ptr %dst, i64 %N) {
384; CHECK-LABEL: define void @zext_iv_increment(
385; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
386; CHECK-NEXT:  entry:
387; CHECK-NEXT:    [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
388; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
389; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
390; CHECK:       vector.scevcheck:
391; CHECK-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
392; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[UMAX]], -1
393; CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
394; CHECK-NEXT:    [[TMP3:%.*]] = add i32 1, [[TMP2]]
395; CHECK-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1
396; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
397; CHECK-NEXT:    [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
398; CHECK-NEXT:    br i1 [[TMP6]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
399; CHECK:       vector.ph:
400; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
401; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
402; CHECK-NEXT:    [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
403; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
404; CHECK:       vector.body:
405; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
406; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
407; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
408; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP7]], i32 2
409; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
410; CHECK-NEXT:    store i32 0, ptr [[TMP9]], align 8
411; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 8
412; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
413; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
414; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
415; CHECK:       middle.block:
416; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
417; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
418; CHECK:       scalar.ph:
419; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ]
420; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ]
421; CHECK-NEXT:    br label [[LOOP:%.*]]
422; CHECK:       loop:
423; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
424; CHECK-NEXT:    [[IV_WIDE:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[IV_NEXT_EXT:%.*]], [[LOOP]] ]
425; CHECK-NEXT:    [[PATCH_INDEX:%.*]] = getelementptr { i32, i32, i32 }, ptr [[DST]], i64 [[IV_WIDE]], i32 2
426; CHECK-NEXT:    store i32 0, ptr [[PATCH_INDEX]], align 8
427; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
428; CHECK-NEXT:    [[IV_NEXT_EXT]] = zext i32 [[IV_NEXT]] to i64
429; CHECK-NEXT:    [[EC:%.*]] = icmp ult i64 [[IV_NEXT_EXT]], [[N]]
430; CHECK-NEXT:    br i1 [[EC]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP14:![0-9]+]]
431; CHECK:       exit:
432; CHECK-NEXT:    ret void
433;
434entry:
435  br label %loop
436
437loop:
438  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
439  %iv.wide = phi i64 [ 0, %entry ], [ %iv.next.ext, %loop ]
440  %patch_index = getelementptr { i32, i32, i32 }, ptr %dst, i64 %iv.wide, i32 2
441  store i32 0, ptr %patch_index, align 8
442  %iv.next = add i32 %iv, 1
443  %iv.next.ext = zext i32 %iv.next to i64
444  %ec = icmp ult i64 %iv.next.ext, %N
445  br i1 %ec, label %loop, label %exit
446
447exit:
448  ret void
449}
450
451;.
452; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
453; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
454; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
455; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
456; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
457; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
458; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
459; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
460; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
461; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
462; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]}
463; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]}
464; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]}
465; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]], [[META2]]}
466; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]]}
467;.
468