xref: /llvm-project/llvm/test/Transforms/LoopVectorize/reverse_induction.ll (revision 4ad0fdd1631eeae432714c03ede01a10dc00025d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s
3
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
5
6; Make sure consecutive vector generates correct negative indices.
7; PR15882
8
9define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) {
10; CHECK-LABEL: define i32 @reverse_induction_i64(
11; CHECK-SAME: i64 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) {
12; CHECK-NEXT:  [[ENTRY:.*]]:
13; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14; CHECK:       [[VECTOR_PH]]:
15; CHECK-NEXT:    [[IND_END:%.*]] = sub i64 [[STARTVAL]], 1024
16; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
17; CHECK:       [[VECTOR_BODY]]:
18; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
19; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
21; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]]
22; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
23; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP0]], -1
24; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]]
25; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
26; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
27; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
28; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
29; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
30; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
31; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4
32; CHECK-NEXT:    [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
33; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
34; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[REVERSE4]], [[VEC_PHI2]]
35; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
36; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
37; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38; CHECK:       [[MIDDLE_BLOCK]]:
39; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[TMP10]]
40; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
41; CHECK-NEXT:    br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]]
42; CHECK:       [[SCALAR_PH]]:
43; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[ENTRY]] ]
44; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
45; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
46; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
47; CHECK:       [[FOR_BODY]]:
48; CHECK-NEXT:    [[ADD_I7:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
49; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
50; CHECK-NEXT:    [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
51; CHECK-NEXT:    [[ADD_I]] = add i64 [[ADD_I7]], -1
52; CHECK-NEXT:    [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD_I]]
53; CHECK-NEXT:    [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
54; CHECK-NEXT:    [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]]
55; CHECK-NEXT:    [[INC4]] = add i32 [[I_06]], 1
56; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024
57; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]], !llvm.loop [[LOOP3:![0-9]+]]
58; CHECK:       [[LOOPEND]]:
59; CHECK-NEXT:    [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ]
60; CHECK-NEXT:    ret i32 [[INC_REDUX_LCSSA]]
61;
62entry:
63  br label %for.body
64
65for.body:
66  %add.i7 = phi i64 [ %startval, %entry ], [ %add.i, %for.body ]
67  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
68  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
69  %add.i = add i64 %add.i7, -1
70  %kind_.i = getelementptr inbounds i32, ptr %ptr, i64 %add.i
71  %tmp.i1 = load i32, ptr %kind_.i, align 4
72  %inc.redux = add i32 %tmp.i1, %redux5
73  %inc4 = add i32 %i.06, 1
74  %exitcond = icmp ne i32 %inc4, 1024
75  br i1 %exitcond, label %for.body, label %loopend
76
77loopend:
78  ret i32 %inc.redux
79}
80
81
82define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) {
83; CHECK-LABEL: define i32 @reverse_induction_i128(
84; CHECK-SAME: i128 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) {
85; CHECK-NEXT:  [[ENTRY:.*]]:
86; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
87; CHECK:       [[VECTOR_PH]]:
88; CHECK-NEXT:    [[IND_END:%.*]] = sub i128 [[STARTVAL]], 1024
89; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
90; CHECK:       [[VECTOR_BODY]]:
91; CHECK-NEXT:    [[INDEX:%.*]] = phi i128 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
92; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
93; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
94; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]]
95; CHECK-NEXT:    [[TMP0:%.*]] = add i128 [[OFFSET_IDX]], 0
96; CHECK-NEXT:    [[TMP3:%.*]] = add i128 [[TMP0]], -1
97; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]]
98; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
99; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3
100; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4
101; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
102; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4
103; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
104; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4
105; CHECK-NEXT:    [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
106; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
107; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[REVERSE4]], [[VEC_PHI2]]
108; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i128 [[INDEX]], 8
109; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i128 [[INDEX_NEXT]], 1024
110; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
111; CHECK:       [[MIDDLE_BLOCK]]:
112; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP11]], [[TMP10]]
113; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
114; CHECK-NEXT:    br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]]
115; CHECK:       [[SCALAR_PH]]:
116; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i128 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[ENTRY]] ]
117; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
118; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
119; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
120; CHECK:       [[FOR_BODY]]:
121; CHECK-NEXT:    [[ADD_I7:%.*]] = phi i128 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
122; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
123; CHECK-NEXT:    [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
124; CHECK-NEXT:    [[ADD_I]] = add i128 [[ADD_I7]], -1
125; CHECK-NEXT:    [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[ADD_I]]
126; CHECK-NEXT:    [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
127; CHECK-NEXT:    [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]]
128; CHECK-NEXT:    [[INC4]] = add i32 [[I_06]], 1
129; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024
130; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]], !llvm.loop [[LOOP5:![0-9]+]]
131; CHECK:       [[LOOPEND]]:
132; CHECK-NEXT:    [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP13]], %[[MIDDLE_BLOCK]] ]
133; CHECK-NEXT:    ret i32 [[INC_REDUX_LCSSA]]
134;
135entry:
136  br label %for.body
137
138for.body:
139  %add.i7 = phi i128 [ %startval, %entry ], [ %add.i, %for.body ]
140  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
141  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
142  %add.i = add i128 %add.i7, -1
143  %kind_.i = getelementptr inbounds i32, ptr %ptr, i128 %add.i
144  %tmp.i1 = load i32, ptr %kind_.i, align 4
145  %inc.redux = add i32 %tmp.i1, %redux5
146  %inc4 = add i32 %i.06, 1
147  %exitcond = icmp ne i32 %inc4, 1024
148  br i1 %exitcond, label %for.body, label %loopend
149
150loopend:
151  ret i32 %inc.redux
152}
153
154
155define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) {
156; CHECK-LABEL: define i32 @reverse_induction_i16(
157; CHECK-SAME: i16 [[STARTVAL:%.*]], ptr [[PTR:%.*]]) {
158; CHECK-NEXT:  [[ENTRY:.*]]:
159; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
160; CHECK:       [[VECTOR_SCEVCHECK]]:
161; CHECK-NEXT:    [[TMP0:%.*]] = add i16 [[STARTVAL]], -1
162; CHECK-NEXT:    [[MUL:%.*]] = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 1, i16 1023)
163; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i16, i1 } [[MUL]], 0
164; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i16, i1 } [[MUL]], 1
165; CHECK-NEXT:    [[TMP1:%.*]] = sub i16 [[TMP0]], [[MUL_RESULT]]
166; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[TMP0]]
167; CHECK-NEXT:    [[TMP3:%.*]] = or i1 [[TMP2]], [[MUL_OVERFLOW]]
168; CHECK-NEXT:    br i1 [[TMP3]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
169; CHECK:       [[VECTOR_PH]]:
170; CHECK-NEXT:    [[IND_END:%.*]] = sub i16 [[STARTVAL]], 1024
171; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
172; CHECK:       [[VECTOR_BODY]]:
173; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
174; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ]
175; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ]
176; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
177; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]]
178; CHECK-NEXT:    [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 0
179; CHECK-NEXT:    [[TMP7:%.*]] = add i16 [[TMP4]], -1
180; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]]
181; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
182; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3
183; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4
184; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3
185; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4
186; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
187; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
188; CHECK-NEXT:    [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
189; CHECK-NEXT:    [[TMP14]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]]
190; CHECK-NEXT:    [[TMP15]] = add <4 x i32> [[REVERSE4]], [[VEC_PHI2]]
191; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
192; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
193; CHECK-NEXT:    br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
194; CHECK:       [[MIDDLE_BLOCK]]:
195; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP15]], [[TMP14]]
196; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
197; CHECK-NEXT:    br i1 true, label %[[LOOPEND:.*]], label %[[SCALAR_PH]]
198; CHECK:       [[SCALAR_PH]]:
199; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[STARTVAL]], %[[VECTOR_SCEVCHECK]] ], [ [[STARTVAL]], %[[ENTRY]] ]
200; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
201; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP17]], %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
202; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
203; CHECK:       [[FOR_BODY]]:
204; CHECK-NEXT:    [[ADD_I7:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD_I:%.*]], %[[FOR_BODY]] ]
205; CHECK-NEXT:    [[I_06:%.*]] = phi i32 [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ], [ [[INC4:%.*]], %[[FOR_BODY]] ]
206; CHECK-NEXT:    [[REDUX5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[INC_REDUX:%.*]], %[[FOR_BODY]] ]
207; CHECK-NEXT:    [[ADD_I]] = add i16 [[ADD_I7]], -1
208; CHECK-NEXT:    [[KIND__I:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[ADD_I]]
209; CHECK-NEXT:    [[TMP_I1:%.*]] = load i32, ptr [[KIND__I]], align 4
210; CHECK-NEXT:    [[INC_REDUX]] = add i32 [[TMP_I1]], [[REDUX5]]
211; CHECK-NEXT:    [[INC4]] = add i32 [[I_06]], 1
212; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[INC4]], 1024
213; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[LOOPEND]], !llvm.loop [[LOOP7:![0-9]+]]
214; CHECK:       [[LOOPEND]]:
215; CHECK-NEXT:    [[INC_REDUX_LCSSA:%.*]] = phi i32 [ [[INC_REDUX]], %[[FOR_BODY]] ], [ [[TMP17]], %[[MIDDLE_BLOCK]] ]
216; CHECK-NEXT:    ret i32 [[INC_REDUX_LCSSA]]
217;
218entry:
219  br label %for.body
220
221for.body:
222  %add.i7 = phi i16 [ %startval, %entry ], [ %add.i, %for.body ]
223  %i.06 = phi i32 [ 0, %entry ], [ %inc4, %for.body ]
224  %redux5 = phi i32 [ 0, %entry ], [ %inc.redux, %for.body ]
225  %add.i = add i16 %add.i7, -1
226  %kind_.i = getelementptr inbounds i32, ptr %ptr, i16 %add.i
227  %tmp.i1 = load i32, ptr %kind_.i, align 4
228  %inc.redux = add i32 %tmp.i1, %redux5
229  %inc4 = add i32 %i.06, 1
230  %exitcond = icmp ne i32 %inc4, 1024
231  br i1 %exitcond, label %for.body, label %loopend
232
233loopend:
234  ret i32 %inc.redux
235}
236
237
238@a = common global [1024 x i32] zeroinitializer, align 16
239
240; We incorrectly transformed this loop into an empty one because we left the
241; induction variable in i8 type and truncated the exit value 1024 to 0.
242; int a[1024];
243;
244; void fail() {
245;   int reverse_induction = 1023;
246;   unsigned char forward_induction = 0;
247;   while ((reverse_induction) >= 0) {
248;     forward_induction++;
249;     a[reverse_induction] = forward_induction;
250;     --reverse_induction;
251;   }
252; }
253
254
255define void @reverse_forward_induction_i64_i8() {
256; CHECK-LABEL: define void @reverse_forward_induction_i64_i8() {
257; CHECK-NEXT:  [[ENTRY:.*]]:
258; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
259; CHECK:       [[VECTOR_PH]]:
260; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
261; CHECK:       [[VECTOR_BODY]]:
262; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
263; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
264; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
265; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
266; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
267; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1)
268; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1)
269; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
270; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
271; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]]
272; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
273; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
274; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4
275; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3
276; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
277; CHECK-NEXT:    store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4
278; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
279; CHECK-NEXT:    store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4
280; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
281; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
282; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
283; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
284; CHECK:       [[MIDDLE_BLOCK]]:
285; CHECK-NEXT:    br i1 true, label %[[WHILE_END:.*]], label %[[SCALAR_PH]]
286; CHECK:       [[SCALAR_PH]]:
287; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ]
288; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i8 [ 0, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
289; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
290; CHECK:       [[WHILE_BODY]]:
291; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
292; CHECK-NEXT:    [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ]
293; CHECK-NEXT:    [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1
294; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[INC]] to i32
295; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]]
296; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4
297; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
298; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i32
299; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0
300; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_BODY]], label %[[WHILE_END]], !llvm.loop [[LOOP9:![0-9]+]]
301; CHECK:       [[WHILE_END]]:
302; CHECK-NEXT:    ret void
303;
304entry:
305  br label %while.body
306
307while.body:
308  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
309  %forward_induction.05 = phi i8 [ 0, %entry ], [ %inc, %while.body ]
310  %inc = add i8 %forward_induction.05, 1
311  %conv = zext i8 %inc to i32
312  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
313  store i32 %conv, ptr %arrayidx, align 4
314  %indvars.iv.next = add i64 %indvars.iv, -1
315  %0 = trunc i64 %indvars.iv to i32
316  %cmp = icmp sgt i32 %0, 0
317  br i1 %cmp, label %while.body, label %while.end
318
319while.end:
320  ret void
321}
322
323
324define void @reverse_forward_induction_i64_i8_signed() {
325; CHECK-LABEL: define void @reverse_forward_induction_i64_i8_signed() {
326; CHECK-NEXT:  [[ENTRY:.*]]:
327; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
328; CHECK:       [[VECTOR_PH]]:
329; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
330; CHECK:       [[VECTOR_BODY]]:
331; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
332; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 -127, i8 -126, i8 -125, i8 -124>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
333; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 4)
334; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
335; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
336; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i8> [[VEC_IND]], splat (i8 1)
337; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i8> [[STEP_ADD]], splat (i8 1)
338; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32>
339; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32>
340; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[TMP0]]
341; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
342; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3
343; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4
344; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3
345; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
346; CHECK-NEXT:    store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4
347; CHECK-NEXT:    [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
348; CHECK-NEXT:    store <4 x i32> [[REVERSE2]], ptr [[TMP11]], align 4
349; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
350; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i8> [[STEP_ADD]], splat (i8 4)
351; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
352; CHECK-NEXT:    br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
353; CHECK:       [[MIDDLE_BLOCK]]:
354; CHECK-NEXT:    br i1 true, label %[[WHILE_END:.*]], label %[[SCALAR_PH]]
355; CHECK:       [[SCALAR_PH]]:
356; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ -1, %[[MIDDLE_BLOCK]] ], [ 1023, %[[ENTRY]] ]
357; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i8 [ -127, %[[MIDDLE_BLOCK]] ], [ -127, %[[ENTRY]] ]
358; CHECK-NEXT:    br label %[[WHILE_BODY:.*]]
359; CHECK:       [[WHILE_BODY]]:
360; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[WHILE_BODY]] ]
361; CHECK-NEXT:    [[FORWARD_INDUCTION_05:%.*]] = phi i8 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[WHILE_BODY]] ]
362; CHECK-NEXT:    [[INC]] = add i8 [[FORWARD_INDUCTION_05]], 1
363; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[INC]] to i32
364; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[INDVARS_IV]]
365; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4
366; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
367; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV]] to i32
368; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 0
369; CHECK-NEXT:    br i1 [[CMP]], label %[[WHILE_BODY]], label %[[WHILE_END]], !llvm.loop [[LOOP11:![0-9]+]]
370; CHECK:       [[WHILE_END]]:
371; CHECK-NEXT:    ret void
372;
373entry:
374  br label %while.body
375
376while.body:
377  %indvars.iv = phi i64 [ 1023, %entry ], [ %indvars.iv.next, %while.body ]
378  %forward_induction.05 = phi i8 [ -127, %entry ], [ %inc, %while.body ]
379  %inc = add i8 %forward_induction.05, 1
380  %conv = sext i8 %inc to i32
381  %arrayidx = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 %indvars.iv
382  store i32 %conv, ptr %arrayidx, align 4
383  %indvars.iv.next = add i64 %indvars.iv, -1
384  %0 = trunc i64 %indvars.iv to i32
385  %cmp = icmp sgt i32 %0, 0
386  br i1 %cmp, label %while.body, label %while.end
387
388while.end:
389  ret void
390}
391;.
392; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
393; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
394; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
395; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
396; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
397; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
398; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
399; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
400; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
401; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
402; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
403; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
404;.
405