xref: /llvm-project/llvm/test/Transforms/LoopVectorize/optsize.ll (revision 4ad0fdd1631eeae432714c03ede01a10dc00025d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; This test verifies that the loop vectorizer will NOT produce a tail
3; loop with the optimize for size or the minimize size attributes.
4; REQUIRES: asserts
5; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -S | FileCheck %s
6; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -pgso -S | FileCheck %s -check-prefix=PGSO
7; RUN: opt < %s -passes='require<profile-summary>,loop-vectorize' -pgso=false -S | FileCheck %s -check-prefix=NPGSO
8
9target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
10
11@tab = common global [32 x i8] zeroinitializer, align 1
12
13define i32 @foo_optsize() #0 {
14; CHECK-LABEL: define i32 @foo_optsize(
15; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
16; CHECK-NEXT:  [[ENTRY:.*]]:
17; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
18; CHECK:       [[FOR_BODY]]:
19; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
20; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
21; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
22; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
23; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
24; CHECK-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
25; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
26; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
27; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
28; CHECK:       [[FOR_END]]:
29; CHECK-NEXT:    ret i32 0
30;
31; PGSO-LABEL: define i32 @foo_optsize(
32; PGSO-SAME: ) #[[ATTR0:[0-9]+]] {
33; PGSO-NEXT:  [[ENTRY:.*]]:
34; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
35; PGSO:       [[FOR_BODY]]:
36; PGSO-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
37; PGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
38; PGSO-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
39; PGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
40; PGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
41; PGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
42; PGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
43; PGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
44; PGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
45; PGSO:       [[FOR_END]]:
46; PGSO-NEXT:    ret i32 0
47;
48; NPGSO-LABEL: define i32 @foo_optsize(
49; NPGSO-SAME: ) #[[ATTR0:[0-9]+]] {
50; NPGSO-NEXT:  [[ENTRY:.*]]:
51; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
52; NPGSO:       [[FOR_BODY]]:
53; NPGSO-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
54; NPGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
55; NPGSO-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
56; NPGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
57; NPGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
58; NPGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
59; NPGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
60; NPGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
61; NPGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
62; NPGSO:       [[FOR_END]]:
63; NPGSO-NEXT:    ret i32 0
64;
65
66entry:
67  br label %for.body
68
69for.body:                                         ; preds = %for.body, %entry
70  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
71  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
72  %0 = load i8, ptr %arrayidx, align 1
73  %cmp1 = icmp eq i8 %0, 0
74  %. = select i1 %cmp1, i8 2, i8 1
75  store i8 %., ptr %arrayidx, align 1
76  %inc = add nsw i32 %i.08, 1
77  %exitcond = icmp eq i32 %i.08, 202
78  br i1 %exitcond, label %for.end, label %for.body
79
80for.end:                                          ; preds = %for.body
81  ret i32 0
82}
83
84attributes #0 = { optsize }
85
86define i32 @foo_minsize() #1 {
87; CHECK-LABEL: define i32 @foo_minsize(
88; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
89; CHECK-NEXT:  [[ENTRY:.*]]:
90; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
91; CHECK:       [[FOR_BODY]]:
92; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
93; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
94; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
95; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
96; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
97; CHECK-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
98; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
99; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
100; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
101; CHECK:       [[FOR_END]]:
102; CHECK-NEXT:    ret i32 0
103;
104; PGSO-LABEL: define i32 @foo_minsize(
105; PGSO-SAME: ) #[[ATTR1:[0-9]+]] {
106; PGSO-NEXT:  [[ENTRY:.*]]:
107; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
108; PGSO:       [[FOR_BODY]]:
109; PGSO-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
110; PGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
111; PGSO-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
112; PGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
113; PGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
114; PGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
115; PGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
116; PGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
117; PGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
118; PGSO:       [[FOR_END]]:
119; PGSO-NEXT:    ret i32 0
120;
121; NPGSO-LABEL: define i32 @foo_minsize(
122; NPGSO-SAME: ) #[[ATTR1:[0-9]+]] {
123; NPGSO-NEXT:  [[ENTRY:.*]]:
124; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
125; NPGSO:       [[FOR_BODY]]:
126; NPGSO-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
127; NPGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
128; NPGSO-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
129; NPGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
130; NPGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
131; NPGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
132; NPGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
133; NPGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
134; NPGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
135; NPGSO:       [[FOR_END]]:
136; NPGSO-NEXT:    ret i32 0
137;
138
139entry:
140  br label %for.body
141
142for.body:                                         ; preds = %for.body, %entry
143  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
144  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
145  %0 = load i8, ptr %arrayidx, align 1
146  %cmp1 = icmp eq i8 %0, 0
147  %. = select i1 %cmp1, i8 2, i8 1
148  store i8 %., ptr %arrayidx, align 1
149  %inc = add nsw i32 %i.08, 1
150  %exitcond = icmp eq i32 %i.08, 202
151  br i1 %exitcond, label %for.end, label %for.body
152
153for.end:                                          ; preds = %for.body
154  ret i32 0
155}
156
157attributes #1 = { minsize }
158
159define i32 @foo_pgso() !prof !14 {
160; CHECK-LABEL: define i32 @foo_pgso(
161; CHECK-SAME: ) !prof [[PROF14:![0-9]+]] {
162; CHECK-NEXT:  [[ENTRY:.*]]:
163; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
164; CHECK:       [[FOR_BODY]]:
165; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
166; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
167; CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
168; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
169; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
170; CHECK-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
171; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
172; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
173; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
174; CHECK:       [[FOR_END]]:
175; CHECK-NEXT:    ret i32 0
176;
177; PGSO-LABEL: define i32 @foo_pgso(
178; PGSO-SAME: ) !prof [[PROF14:![0-9]+]] {
179; PGSO-NEXT:  [[ENTRY:.*]]:
180; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
181; PGSO:       [[FOR_BODY]]:
182; PGSO-NEXT:    [[I_08:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
183; PGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
184; PGSO-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
185; PGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP0]], 0
186; PGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
187; PGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
188; PGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
189; PGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
190; PGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
191; PGSO:       [[FOR_END]]:
192; PGSO-NEXT:    ret i32 0
193;
194; NPGSO-LABEL: define i32 @foo_pgso(
195; NPGSO-SAME: ) !prof [[PROF14:![0-9]+]] {
196; NPGSO-NEXT:  [[ENTRY:.*]]:
197; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
198; NPGSO:       [[VECTOR_PH]]:
199; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
200; NPGSO:       [[VECTOR_BODY]]:
201; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
202; NPGSO-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
203; NPGSO-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[TMP0]]
204; NPGSO-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0
205; NPGSO-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1
206; NPGSO-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer
207; NPGSO-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i8> splat (i8 2), <4 x i8> splat (i8 1)
208; NPGSO-NEXT:    store <4 x i8> [[TMP4]], ptr [[TMP2]], align 1
209; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
210; NPGSO-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 200
211; NPGSO-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
212; NPGSO:       [[MIDDLE_BLOCK]]:
213; NPGSO-NEXT:    br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
214; NPGSO:       [[SCALAR_PH]]:
215; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 200, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
216; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
217; NPGSO:       [[FOR_BODY]]:
218; NPGSO-NEXT:    [[I_08:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ]
219; NPGSO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 [[I_08]]
220; NPGSO-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
221; NPGSO-NEXT:    [[CMP1:%.*]] = icmp eq i8 [[TMP6]], 0
222; NPGSO-NEXT:    [[DOT:%.*]] = select i1 [[CMP1]], i8 2, i8 1
223; NPGSO-NEXT:    store i8 [[DOT]], ptr [[ARRAYIDX]], align 1
224; NPGSO-NEXT:    [[INC]] = add nsw i32 [[I_08]], 1
225; NPGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[I_08]], 202
226; NPGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
227; NPGSO:       [[FOR_END]]:
228; NPGSO-NEXT:    ret i32 0
229;
230
231entry:
232  br label %for.body
233
234for.body:                                         ; preds = %for.body, %entry
235  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
236  %arrayidx = getelementptr inbounds [32 x i8], ptr @tab, i32 0, i32 %i.08
237  %0 = load i8, ptr %arrayidx, align 1
238  %cmp1 = icmp eq i8 %0, 0
239  %. = select i1 %cmp1, i8 2, i8 1
240  store i8 %., ptr %arrayidx, align 1
241  %inc = add nsw i32 %i.08, 1
242  %exitcond = icmp eq i32 %i.08, 202
243  br i1 %exitcond, label %for.end, label %for.body
244
245for.end:                                          ; preds = %for.body
246  ret i32 0
247}
248
249; PR43371: don't run into an assert due to emitting SCEV runtime checks
250; with OptForSize.
251;
252@cm_array = external global [2592 x i16], align 1
253
254define void @pr43371() optsize {
255;
256; CHECK-LABEL: define void @pr43371(
257; CHECK-SAME: ) #[[ATTR0]] {
258; CHECK-NEXT:  [[ENTRY:.*]]:
259; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
260; CHECK:       [[VECTOR_PH]]:
261; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
262; CHECK:       [[VECTOR_BODY]]:
263; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
264; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
265; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]]
266; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
267; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
268; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
269; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
270; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
271; CHECK-NEXT:    store i16 0, ptr [[TMP3]], align 1
272; CHECK-NEXT:    store i16 0, ptr [[TMP5]], align 1
273; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
274; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
275; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
276; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
277; CHECK:       [[MIDDLE_BLOCK]]:
278; CHECK-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
279; CHECK:       [[SCALAR_PH]]:
280; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
281; CHECK-NEXT:    br label %[[FOR_BODY29:.*]]
282; CHECK:       [[FOR_COND_CLEANUP28]]:
283; CHECK-NEXT:    unreachable
284; CHECK:       [[FOR_BODY29]]:
285; CHECK-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
286; CHECK-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
287; CHECK-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
288; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
289; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
290; CHECK-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
291; CHECK-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
292; CHECK-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP18:![0-9]+]]
293;
294; PGSO-LABEL: define void @pr43371(
295; PGSO-SAME: ) #[[ATTR0]] {
296; PGSO-NEXT:  [[ENTRY:.*]]:
297; PGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
298; PGSO:       [[VECTOR_PH]]:
299; PGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
300; PGSO:       [[VECTOR_BODY]]:
301; PGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
302; PGSO-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
303; PGSO-NEXT:    [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]]
304; PGSO-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
305; PGSO-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
306; PGSO-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
307; PGSO-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
308; PGSO-NEXT:    [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
309; PGSO-NEXT:    store i16 0, ptr [[TMP3]], align 1
310; PGSO-NEXT:    store i16 0, ptr [[TMP5]], align 1
311; PGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
312; PGSO-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
313; PGSO-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
314; PGSO-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
315; PGSO:       [[MIDDLE_BLOCK]]:
316; PGSO-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
317; PGSO:       [[SCALAR_PH]]:
318; PGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
319; PGSO-NEXT:    br label %[[FOR_BODY29:.*]]
320; PGSO:       [[FOR_COND_CLEANUP28]]:
321; PGSO-NEXT:    unreachable
322; PGSO:       [[FOR_BODY29]]:
323; PGSO-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
324; PGSO-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
325; PGSO-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
326; PGSO-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
327; PGSO-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
328; PGSO-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
329; PGSO-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
330; PGSO-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP18:![0-9]+]]
331;
332; NPGSO-LABEL: define void @pr43371(
333; NPGSO-SAME: ) #[[ATTR0]] {
334; NPGSO-NEXT:  [[ENTRY:.*]]:
335; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
336; NPGSO:       [[VECTOR_PH]]:
337; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
338; NPGSO:       [[VECTOR_BODY]]:
339; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
340; NPGSO-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
341; NPGSO-NEXT:    [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]]
342; NPGSO-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
343; NPGSO-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
344; NPGSO-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
345; NPGSO-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
346; NPGSO-NEXT:    [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
347; NPGSO-NEXT:    store i16 0, ptr [[TMP3]], align 1
348; NPGSO-NEXT:    store i16 0, ptr [[TMP5]], align 1
349; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
350; NPGSO-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
351; NPGSO-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
352; NPGSO-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
353; NPGSO:       [[MIDDLE_BLOCK]]:
354; NPGSO-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
355; NPGSO:       [[SCALAR_PH]]:
356; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
357; NPGSO-NEXT:    br label %[[FOR_BODY29:.*]]
358; NPGSO:       [[FOR_COND_CLEANUP28]]:
359; NPGSO-NEXT:    unreachable
360; NPGSO:       [[FOR_BODY29]]:
361; NPGSO-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
362; NPGSO-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
363; NPGSO-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
364; NPGSO-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
365; NPGSO-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
366; NPGSO-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
367; NPGSO-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
368; NPGSO-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]]
369;
370; We do not want to generate SCEV predicates when optimising for size, because
371; that will lead to extra code generation such as the SCEV overflow runtime
372; checks. Not generating SCEV predicates can still result in vectorisation as
373; the non-consecutive loads/stores can be scalarized:
374entry:
375  br label %for.body29
376
377for.cond.cleanup28:
378  unreachable
379
380for.body29:
381  %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
382  %add33 = add i16 undef, %i24.0170
383  %idxprom34 = zext i16 %add33 to i32
384  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
385  store i16 0, ptr %arrayidx35, align 1
386  %inc37 = add i16 %i24.0170, 1
387  %cmp26 = icmp ult i16 %inc37, 756
388  br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
389}
390
391define void @pr43371_pgso() !prof !14 {
392;
393; CHECK-LABEL: define void @pr43371_pgso(
394; CHECK-SAME: ) !prof [[PROF14]] {
395; CHECK-NEXT:  [[ENTRY:.*]]:
396; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
397; CHECK:       [[VECTOR_PH]]:
398; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
399; CHECK:       [[VECTOR_BODY]]:
400; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
401; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
402; CHECK-NEXT:    [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]]
403; CHECK-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
404; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
405; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
406; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
407; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
408; CHECK-NEXT:    store i16 0, ptr [[TMP3]], align 1
409; CHECK-NEXT:    store i16 0, ptr [[TMP5]], align 1
410; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
411; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
412; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
413; CHECK-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
414; CHECK:       [[MIDDLE_BLOCK]]:
415; CHECK-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
416; CHECK:       [[SCALAR_PH]]:
417; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
418; CHECK-NEXT:    br label %[[FOR_BODY29:.*]]
419; CHECK:       [[FOR_COND_CLEANUP28]]:
420; CHECK-NEXT:    unreachable
421; CHECK:       [[FOR_BODY29]]:
422; CHECK-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
423; CHECK-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
424; CHECK-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
425; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
426; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
427; CHECK-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
428; CHECK-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
429; CHECK-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]]
430;
431; PGSO-LABEL: define void @pr43371_pgso(
432; PGSO-SAME: ) !prof [[PROF14]] {
433; PGSO-NEXT:  [[ENTRY:.*]]:
434; PGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
435; PGSO:       [[VECTOR_PH]]:
436; PGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
437; PGSO:       [[VECTOR_BODY]]:
438; PGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
439; PGSO-NEXT:    [[VEC_IND:%.*]] = phi <2 x i16> [ <i16 0, i16 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
440; PGSO-NEXT:    [[TMP0:%.*]] = add <2 x i16> undef, [[VEC_IND]]
441; PGSO-NEXT:    [[TMP1:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
442; PGSO-NEXT:    [[TMP2:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
443; PGSO-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
444; PGSO-NEXT:    [[TMP4:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
445; PGSO-NEXT:    [[TMP5:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP4]]
446; PGSO-NEXT:    store i16 0, ptr [[TMP3]], align 1
447; PGSO-NEXT:    store i16 0, ptr [[TMP5]], align 1
448; PGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
449; PGSO-NEXT:    [[VEC_IND_NEXT]] = add <2 x i16> [[VEC_IND]], splat (i16 2)
450; PGSO-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
451; PGSO-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
452; PGSO:       [[MIDDLE_BLOCK]]:
453; PGSO-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
454; PGSO:       [[SCALAR_PH]]:
455; PGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
456; PGSO-NEXT:    br label %[[FOR_BODY29:.*]]
457; PGSO:       [[FOR_COND_CLEANUP28]]:
458; PGSO-NEXT:    unreachable
459; PGSO:       [[FOR_BODY29]]:
460; PGSO-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
461; PGSO-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
462; PGSO-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
463; PGSO-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
464; PGSO-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
465; PGSO-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
466; PGSO-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
467; PGSO-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP20:![0-9]+]]
468;
469; NPGSO-LABEL: define void @pr43371_pgso(
470; NPGSO-SAME: ) !prof [[PROF14]] {
471; NPGSO-NEXT:  [[ENTRY:.*]]:
472; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
473; NPGSO:       [[VECTOR_SCEVCHECK]]:
474; NPGSO-NEXT:    br i1 undef, label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
475; NPGSO:       [[VECTOR_PH]]:
476; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
477; NPGSO:       [[VECTOR_BODY]]:
478; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
479; NPGSO-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
480; NPGSO-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
481; NPGSO-NEXT:    [[TMP1:%.*]] = add i16 undef, [[TMP0]]
482; NPGSO-NEXT:    [[TMP2:%.*]] = zext i16 [[TMP1]] to i32
483; NPGSO-NEXT:    [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]]
484; NPGSO-NEXT:    [[TMP4:%.*]] = getelementptr i16, ptr [[TMP3]], i32 0
485; NPGSO-NEXT:    store <2 x i16> zeroinitializer, ptr [[TMP4]], align 1
486; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
487; NPGSO-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756
488; NPGSO-NEXT:    br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
489; NPGSO:       [[MIDDLE_BLOCK]]:
490; NPGSO-NEXT:    br i1 true, label %[[FOR_COND_CLEANUP28:.*]], label %[[SCALAR_PH]]
491; NPGSO:       [[SCALAR_PH]]:
492; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 756, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
493; NPGSO-NEXT:    br label %[[FOR_BODY29:.*]]
494; NPGSO:       [[FOR_COND_CLEANUP28]]:
495; NPGSO-NEXT:    unreachable
496; NPGSO:       [[FOR_BODY29]]:
497; NPGSO-NEXT:    [[I24_0170:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ]
498; NPGSO-NEXT:    [[ADD33:%.*]] = add i16 undef, [[I24_0170]]
499; NPGSO-NEXT:    [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32
500; NPGSO-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]]
501; NPGSO-NEXT:    store i16 0, ptr [[ARRAYIDX35]], align 1
502; NPGSO-NEXT:    [[INC37]] = add i16 [[I24_0170]], 1
503; NPGSO-NEXT:    [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756
504; NPGSO-NEXT:    br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP22:![0-9]+]]
505;
506; We do not want to generate SCEV predicates when optimising for size, because
507; that will lead to extra code generation such as the SCEV overflow runtime
508; checks. Not generating SCEV predicates can still result in vectorisation as
509; the non-consecutive loads/stores can be scalarized:
510entry:
511  br label %for.body29
512
513for.cond.cleanup28:
514  unreachable
515
516for.body29:
517  %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
518  %add33 = add i16 undef, %i24.0170
519  %idxprom34 = zext i16 %add33 to i32
520  %arrayidx35 = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 %idxprom34
521  store i16 0, ptr %arrayidx35, align 1
522  %inc37 = add i16 %i24.0170, 1
523  %cmp26 = icmp ult i16 %inc37, 756
524  br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
525}
526
527; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
528;
529define i32 @pr45526() optsize {
530;
531; CHECK-LABEL: define i32 @pr45526(
532; CHECK-SAME: ) #[[ATTR0]] {
533; CHECK-NEXT:  [[ENTRY:.*]]:
534; CHECK-NEXT:    br label %[[LOOP:.*]]
535; CHECK:       [[LOOP]]:
536; CHECK-NEXT:    [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
537; CHECK-NEXT:    [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
538; CHECK-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
539; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
540; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
541; CHECK:       [[EXIT]]:
542; CHECK-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
543; CHECK-NEXT:    ret i32 [[FOR_LCSSA]]
544;
545; PGSO-LABEL: define i32 @pr45526(
546; PGSO-SAME: ) #[[ATTR0]] {
547; PGSO-NEXT:  [[ENTRY:.*]]:
548; PGSO-NEXT:    br label %[[LOOP:.*]]
549; PGSO:       [[LOOP]]:
550; PGSO-NEXT:    [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
551; PGSO-NEXT:    [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
552; PGSO-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
553; PGSO-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
554; PGSO-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
555; PGSO:       [[EXIT]]:
556; PGSO-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
557; PGSO-NEXT:    ret i32 [[FOR_LCSSA]]
558;
559; NPGSO-LABEL: define i32 @pr45526(
560; NPGSO-SAME: ) #[[ATTR0]] {
561; NPGSO-NEXT:  [[ENTRY:.*]]:
562; NPGSO-NEXT:    br label %[[LOOP:.*]]
563; NPGSO:       [[LOOP]]:
564; NPGSO-NEXT:    [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
565; NPGSO-NEXT:    [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
566; NPGSO-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
567; NPGSO-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
568; NPGSO-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
569; NPGSO:       [[EXIT]]:
570; NPGSO-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
571; NPGSO-NEXT:    ret i32 [[FOR_LCSSA]]
572;
573entry:
574  br label %loop
575
576loop:
577  %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
578  %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
579  %pivPlus1 = add nuw nsw i32 %piv, 1
580  %cond = icmp ult i32 %piv, 510
581  br i1 %cond, label %loop, label %exit
582
583exit:
584  ret i32 %for
585}
586
587define i32 @pr45526_pgso() !prof !14 {
588;
589; CHECK-LABEL: define i32 @pr45526_pgso(
590; CHECK-SAME: ) !prof [[PROF14]] {
591; CHECK-NEXT:  [[ENTRY:.*]]:
592; CHECK-NEXT:    br label %[[LOOP:.*]]
593; CHECK:       [[LOOP]]:
594; CHECK-NEXT:    [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
595; CHECK-NEXT:    [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
596; CHECK-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
597; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
598; CHECK-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
599; CHECK:       [[EXIT]]:
600; CHECK-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
601; CHECK-NEXT:    ret i32 [[FOR_LCSSA]]
602;
603; PGSO-LABEL: define i32 @pr45526_pgso(
604; PGSO-SAME: ) !prof [[PROF14]] {
605; PGSO-NEXT:  [[ENTRY:.*]]:
606; PGSO-NEXT:    br label %[[LOOP:.*]]
607; PGSO:       [[LOOP]]:
608; PGSO-NEXT:    [[PIV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
609; PGSO-NEXT:    [[FOR:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
610; PGSO-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
611; PGSO-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
612; PGSO-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]]
613; PGSO:       [[EXIT]]:
614; PGSO-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ]
615; PGSO-NEXT:    ret i32 [[FOR_LCSSA]]
616;
617; NPGSO-LABEL: define i32 @pr45526_pgso(
618; NPGSO-SAME: ) !prof [[PROF14]] {
619; NPGSO-NEXT:  [[ENTRY:.*]]:
620; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
621; NPGSO:       [[VECTOR_PH]]:
622; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
623; NPGSO:       [[VECTOR_BODY]]:
624; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
625; NPGSO-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
626; NPGSO-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 5>, %[[VECTOR_PH]] ], [ [[TMP0:%.*]], %[[VECTOR_BODY]] ]
627; NPGSO-NEXT:    [[TMP0]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1)
628; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
629; NPGSO-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
630; NPGSO-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508
631; NPGSO-NEXT:    br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
632; NPGSO:       [[MIDDLE_BLOCK]]:
633; NPGSO-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
634; NPGSO-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
635; NPGSO-NEXT:    br i1 false, label %[[EXIT:.*]], label %[[SCALAR_PH]]
636; NPGSO:       [[SCALAR_PH]]:
637; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 508, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
638; NPGSO-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 5, %[[ENTRY]] ]
639; NPGSO-NEXT:    br label %[[LOOP:.*]]
640; NPGSO:       [[LOOP]]:
641; NPGSO-NEXT:    [[PIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ]
642; NPGSO-NEXT:    [[FOR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ]
643; NPGSO-NEXT:    [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1
644; NPGSO-NEXT:    [[COND:%.*]] = icmp ult i32 [[PIV]], 510
645; NPGSO-NEXT:    br i1 [[COND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP24:![0-9]+]]
646; NPGSO:       [[EXIT]]:
647; NPGSO-NEXT:    [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], %[[MIDDLE_BLOCK]] ]
648; NPGSO-NEXT:    ret i32 [[FOR_LCSSA]]
649;
650entry:
651  br label %loop
652
653loop:
654  %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
655  %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
656  %pivPlus1 = add nuw nsw i32 %piv, 1
657  %cond = icmp ult i32 %piv, 510
658  br i1 %cond, label %loop, label %exit
659
660exit:
661  ret i32 %for
662}
663
664; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
665; vectorization.
666
667; NOTE: Some assertions have been autogenerated by utils/update_test_checks.py
668define void @stride1(ptr noalias %B, i32 %BStride) optsize {
669; CHECK-LABEL: define void @stride1(
670; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
671; CHECK-NEXT:  [[ENTRY:.*]]:
672; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
673; CHECK:       [[VECTOR_PH]]:
674; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
675; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
676; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
677; CHECK:       [[VECTOR_BODY]]:
678; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
679; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
680; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
681; CHECK-NEXT:    [[TMP0:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
682; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
683; CHECK-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
684; CHECK:       [[PRED_STORE_IF]]:
685; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
686; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
687; CHECK-NEXT:    store i16 42, ptr [[TMP4]], align 4
688; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
689; CHECK:       [[PRED_STORE_CONTINUE]]:
690; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
691; CHECK-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
692; CHECK:       [[PRED_STORE_IF1]]:
693; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
694; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
695; CHECK-NEXT:    store i16 42, ptr [[TMP7]], align 4
696; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
697; CHECK:       [[PRED_STORE_CONTINUE2]]:
698; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
699; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
700; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
701; CHECK-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
702; CHECK:       [[MIDDLE_BLOCK]]:
703; CHECK-NEXT:    br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
704; CHECK:       [[SCALAR_PH]]:
705; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
706; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
707; CHECK:       [[FOR_BODY]]:
708; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
709; CHECK-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
710; CHECK-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
711; CHECK-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
712; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
713; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
714; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
715; CHECK:       [[FOR_END]]:
716; CHECK-NEXT:    ret void
717;
718; PGSO-LABEL: define void @stride1(
719; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
720; PGSO-NEXT:  [[ENTRY:.*]]:
721; PGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
722; PGSO:       [[VECTOR_PH]]:
723; PGSO-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
724; PGSO-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
725; PGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
726; PGSO:       [[VECTOR_BODY]]:
727; PGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
728; PGSO-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
729; PGSO-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
730; PGSO-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
731; PGSO-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
732; PGSO-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
733; PGSO:       [[PRED_STORE_IF]]:
734; PGSO-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
735; PGSO-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
736; PGSO-NEXT:    store i16 42, ptr [[TMP4]], align 4
737; PGSO-NEXT:    br label %[[PRED_STORE_CONTINUE]]
738; PGSO:       [[PRED_STORE_CONTINUE]]:
739; PGSO-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
740; PGSO-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
741; PGSO:       [[PRED_STORE_IF1]]:
742; PGSO-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
743; PGSO-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
744; PGSO-NEXT:    store i16 42, ptr [[TMP7]], align 4
745; PGSO-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
746; PGSO:       [[PRED_STORE_CONTINUE2]]:
747; PGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
748; PGSO-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
749; PGSO-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
750; PGSO-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
751; PGSO:       [[MIDDLE_BLOCK]]:
752; PGSO-NEXT:    br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
753; PGSO:       [[SCALAR_PH]]:
754; PGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
755; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
756; PGSO:       [[FOR_BODY]]:
757; PGSO-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
758; PGSO-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
759; PGSO-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
760; PGSO-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
761; PGSO-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
762; PGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
763; PGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
764; PGSO:       [[FOR_END]]:
765; PGSO-NEXT:    ret void
766;
767; NPGSO-LABEL: define void @stride1(
768; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) #[[ATTR0]] {
769; NPGSO-NEXT:  [[ENTRY:.*]]:
770; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
771; NPGSO:       [[VECTOR_PH]]:
772; NPGSO-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[BSTRIDE]], i64 0
773; NPGSO-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
774; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
775; NPGSO:       [[VECTOR_BODY]]:
776; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
777; NPGSO-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE2]] ]
778; NPGSO-NEXT:    [[TMP0:%.*]] = icmp ule <2 x i32> [[VEC_IND]], splat (i32 1024)
779; NPGSO-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
780; NPGSO-NEXT:    [[TMP2:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
781; NPGSO-NEXT:    br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
782; NPGSO:       [[PRED_STORE_IF]]:
783; NPGSO-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0
784; NPGSO-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP3]]
785; NPGSO-NEXT:    store i16 42, ptr [[TMP4]], align 4
786; NPGSO-NEXT:    br label %[[PRED_STORE_CONTINUE]]
787; NPGSO:       [[PRED_STORE_CONTINUE]]:
788; NPGSO-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
789; NPGSO-NEXT:    br i1 [[TMP5]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
790; NPGSO:       [[PRED_STORE_IF1]]:
791; NPGSO-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1
792; NPGSO-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP6]]
793; NPGSO-NEXT:    store i16 42, ptr [[TMP7]], align 4
794; NPGSO-NEXT:    br label %[[PRED_STORE_CONTINUE2]]
795; NPGSO:       [[PRED_STORE_CONTINUE2]]:
796; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
797; NPGSO-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
798; NPGSO-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
799; NPGSO-NEXT:    br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
800; NPGSO:       [[MIDDLE_BLOCK]]:
801; NPGSO-NEXT:    br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
802; NPGSO:       [[SCALAR_PH]]:
803; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1026, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
804; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
805; NPGSO:       [[FOR_BODY]]:
806; NPGSO-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
807; NPGSO-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
808; NPGSO-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
809; NPGSO-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
810; NPGSO-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
811; NPGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
812; NPGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
813; NPGSO:       [[FOR_END]]:
814; NPGSO-NEXT:    ret void
815;
816
817entry:
818  br label %for.body
819
820for.body:
821  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
822  %mulB = mul nsw i32 %iv, %BStride
823  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
824  store i16 42, ptr %gepOfB, align 4
825  %iv.next = add nuw nsw i32 %iv, 1
826  %exitcond = icmp eq i32 %iv.next, 1025
827  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
828
829for.end:
830  ret void
831}
832
833; Vectorize with versioning for unit stride for PGSO and enabled vectorization.
834;
835define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 {
836; CHECK-LABEL: define void @stride1_pgso(
837; CHECK-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
838; CHECK-NEXT:  [[ENTRY:.*]]:
839; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
840; CHECK:       [[VECTOR_SCEVCHECK]]:
841; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
842; CHECK-NEXT:    br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
843; CHECK:       [[VECTOR_PH]]:
844; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
845; CHECK:       [[VECTOR_BODY]]:
846; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
847; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
848; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
849; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
850; CHECK-NEXT:    store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4
851; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
852; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
853; CHECK-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
854; CHECK:       [[MIDDLE_BLOCK]]:
855; CHECK-NEXT:    br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
856; CHECK:       [[SCALAR_PH]]:
857; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
858; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
859; CHECK:       [[FOR_BODY]]:
860; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
861; CHECK-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
862; CHECK-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
863; CHECK-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
864; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
865; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
866; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
867; CHECK:       [[FOR_END]]:
868; CHECK-NEXT:    ret void
869;
870; PGSO-LABEL: define void @stride1_pgso(
871; PGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
872; PGSO-NEXT:  [[ENTRY:.*]]:
873; PGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
874; PGSO:       [[VECTOR_SCEVCHECK]]:
875; PGSO-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
876; PGSO-NEXT:    br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
877; PGSO:       [[VECTOR_PH]]:
878; PGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
879; PGSO:       [[VECTOR_BODY]]:
880; PGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
881; PGSO-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
882; PGSO-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
883; PGSO-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
884; PGSO-NEXT:    store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4
885; PGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
886; PGSO-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
887; PGSO-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
888; PGSO:       [[MIDDLE_BLOCK]]:
889; PGSO-NEXT:    br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
890; PGSO:       [[SCALAR_PH]]:
891; PGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
892; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
893; PGSO:       [[FOR_BODY]]:
894; PGSO-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
895; PGSO-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
896; PGSO-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
897; PGSO-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
898; PGSO-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
899; PGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
900; PGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
901; PGSO:       [[FOR_END]]:
902; PGSO-NEXT:    ret void
903;
904; NPGSO-LABEL: define void @stride1_pgso(
905; NPGSO-SAME: ptr noalias [[B:%.*]], i32 [[BSTRIDE:%.*]]) !prof [[PROF14]] {
906; NPGSO-NEXT:  [[ENTRY:.*]]:
907; NPGSO-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
908; NPGSO:       [[VECTOR_SCEVCHECK]]:
909; NPGSO-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[BSTRIDE]], 1
910; NPGSO-NEXT:    br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
911; NPGSO:       [[VECTOR_PH]]:
912; NPGSO-NEXT:    br label %[[VECTOR_BODY:.*]]
913; NPGSO:       [[VECTOR_BODY]]:
914; NPGSO-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
915; NPGSO-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
916; NPGSO-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]]
917; NPGSO-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
918; NPGSO-NEXT:    store <2 x i16> splat (i16 42), ptr [[TMP2]], align 4
919; NPGSO-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
920; NPGSO-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
921; NPGSO-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
922; NPGSO:       [[MIDDLE_BLOCK]]:
923; NPGSO-NEXT:    br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
924; NPGSO:       [[SCALAR_PH]]:
925; NPGSO-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ENTRY]] ]
926; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
927; NPGSO:       [[FOR_BODY]]:
928; NPGSO-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
929; NPGSO-NEXT:    [[MULB:%.*]] = mul nsw i32 [[IV]], [[BSTRIDE]]
930; NPGSO-NEXT:    [[GEPOFB:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[MULB]]
931; NPGSO-NEXT:    store i16 42, ptr [[GEPOFB]], align 4
932; NPGSO-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
933; NPGSO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025
934; NPGSO-NEXT:    br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
935; NPGSO:       [[FOR_END]]:
936; NPGSO-NEXT:    ret void
937;
938
939entry:
940  br label %for.body
941
942for.body:
943  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
944  %mulB = mul nsw i32 %iv, %BStride
945  %gepOfB = getelementptr inbounds i16, ptr %B, i32 %mulB
946  store i16 42, ptr %gepOfB, align 4
947  %iv.next = add nuw nsw i32 %iv, 1
948  %exitcond = icmp eq i32 %iv.next, 1025
949  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
950
951for.end:
952  ret void
953}
954
955; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
956; having tiny trip count, when compiling w/o -Os/-Oz.
957
958@g = external global [1 x i16], align 1
959
960define void @pr46652(i16 %stride) {
961; CHECK-LABEL: define void @pr46652(
962; CHECK-SAME: i16 [[STRIDE:%.*]]) {
963; CHECK-NEXT:  [[ENTRY:.*]]:
964; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
965; CHECK:       [[FOR_BODY]]:
966; CHECK-NEXT:    [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
967; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
968; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
969; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
970; CHECK-NEXT:    [[INC9]] = add nuw nsw i16 [[L1_02]], 1
971; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
972; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
973; CHECK:       [[FOR_END]]:
974; CHECK-NEXT:    ret void
975;
976; PGSO-LABEL: define void @pr46652(
977; PGSO-SAME: i16 [[STRIDE:%.*]]) {
978; PGSO-NEXT:  [[ENTRY:.*]]:
979; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
980; PGSO:       [[FOR_BODY]]:
981; PGSO-NEXT:    [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
982; PGSO-NEXT:    [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
983; PGSO-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
984; PGSO-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
985; PGSO-NEXT:    [[INC9]] = add nuw nsw i16 [[L1_02]], 1
986; PGSO-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
987; PGSO-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
988; PGSO:       [[FOR_END]]:
989; PGSO-NEXT:    ret void
990;
991; NPGSO-LABEL: define void @pr46652(
992; NPGSO-SAME: i16 [[STRIDE:%.*]]) {
993; NPGSO-NEXT:  [[ENTRY:.*]]:
994; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
995; NPGSO:       [[FOR_BODY]]:
996; NPGSO-NEXT:    [[L1_02:%.*]] = phi i16 [ 1, %[[ENTRY]] ], [ [[INC9:%.*]], %[[FOR_BODY]] ]
997; NPGSO-NEXT:    [[MUL:%.*]] = mul nsw i16 [[L1_02]], [[STRIDE]]
998; NPGSO-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 [[MUL]]
999; NPGSO-NEXT:    [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX6]], align 1
1000; NPGSO-NEXT:    [[INC9]] = add nuw nsw i16 [[L1_02]], 1
1001; NPGSO-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i16 [[INC9]], 16
1002; NPGSO-NEXT:    br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]]
1003; NPGSO:       [[FOR_END]]:
1004; NPGSO-NEXT:    ret void
1005;
1006entry:
1007  br label %for.body
1008
1009for.body:                                        ; preds = %for.body, %entry
1010  %l1.02 = phi i16 [ 1, %entry ], [ %inc9, %for.body ]
1011  %mul = mul nsw i16 %l1.02, %stride
1012  %arrayidx6 = getelementptr inbounds [1 x i16], ptr @g, i16 0, i16 %mul
1013  %0 = load i16, ptr %arrayidx6, align 1
1014  %inc9 = add nuw nsw i16 %l1.02, 1
1015  %exitcond.not = icmp eq i16 %inc9, 16
1016  br i1 %exitcond.not, label %for.end, label %for.body
1017
1018for.end:                                        ; preds = %for.body
1019  ret void
1020}
1021
1022; Make sure we do not crash while building the VPlan for the loop with the
1023; select below.
1024define i32 @PR48142(ptr %ptr.start, ptr %ptr.end) optsize {
1025; CHECK-LABEL: define i32 @PR48142(
1026; CHECK-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
1027; CHECK-NEXT:  [[ENTRY:.*]]:
1028; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
1029; CHECK:       [[FOR_BODY]]:
1030; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1031; CHECK-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
1032; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
1033; CHECK-NEXT:    [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
1034; CHECK-NEXT:    store i32 0, ptr [[PTR_IV]], align 4
1035; CHECK-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
1036; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
1037; CHECK-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1038; CHECK:       [[EXIT]]:
1039; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
1040; CHECK-NEXT:    ret i32 [[RES]]
1041;
1042; PGSO-LABEL: define i32 @PR48142(
1043; PGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
1044; PGSO-NEXT:  [[ENTRY:.*]]:
1045; PGSO-NEXT:    br label %[[FOR_BODY:.*]]
1046; PGSO:       [[FOR_BODY]]:
1047; PGSO-NEXT:    [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1048; PGSO-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
1049; PGSO-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
1050; PGSO-NEXT:    [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
1051; PGSO-NEXT:    store i32 0, ptr [[PTR_IV]], align 4
1052; PGSO-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
1053; PGSO-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
1054; PGSO-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1055; PGSO:       [[EXIT]]:
1056; PGSO-NEXT:    [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
1057; PGSO-NEXT:    ret i32 [[RES]]
1058;
1059; NPGSO-LABEL: define i32 @PR48142(
1060; NPGSO-SAME: ptr [[PTR_START:%.*]], ptr [[PTR_END:%.*]]) #[[ATTR0]] {
1061; NPGSO-NEXT:  [[ENTRY:.*]]:
1062; NPGSO-NEXT:    br label %[[FOR_BODY:.*]]
1063; NPGSO:       [[FOR_BODY]]:
1064; NPGSO-NEXT:    [[I_014:%.*]] = phi i32 [ 20, %[[ENTRY]] ], [ [[COND:%.*]], %[[FOR_BODY]] ]
1065; NPGSO-NEXT:    [[PTR_IV:%.*]] = phi ptr [ [[PTR_START]], %[[ENTRY]] ], [ [[PTR_NEXT:%.*]], %[[FOR_BODY]] ]
1066; NPGSO-NEXT:    [[CMP4:%.*]] = icmp slt i32 [[I_014]], 99
1067; NPGSO-NEXT:    [[COND]] = select i1 [[CMP4]], i32 99, i32 [[I_014]]
1068; NPGSO-NEXT:    store i32 0, ptr [[PTR_IV]], align 4
1069; NPGSO-NEXT:    [[PTR_NEXT]] = getelementptr inbounds i32, ptr [[PTR_IV]], i64 1
1070; NPGSO-NEXT:    [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_NEXT]], [[PTR_END]]
1071; NPGSO-NEXT:    br i1 [[CMP_NOT]], label %[[EXIT:.*]], label %[[FOR_BODY]]
1072; NPGSO:       [[EXIT]]:
1073; NPGSO-NEXT:    [[RES:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ]
1074; NPGSO-NEXT:    ret i32 [[RES]]
1075;
1076entry:
1077  br label %for.body
1078
1079for.body:
1080  %i.014 = phi i32 [ 20, %entry ], [ %cond, %for.body ]
1081  %ptr.iv = phi ptr [ %ptr.start, %entry ], [ %ptr.next, %for.body ]
1082  %cmp4 = icmp slt i32 %i.014, 99
1083  %cond = select i1 %cmp4, i32 99, i32 %i.014
1084  store i32 0, ptr %ptr.iv
1085  %ptr.next = getelementptr inbounds i32, ptr %ptr.iv, i64 1
1086  %cmp.not = icmp eq ptr %ptr.next, %ptr.end
1087  br i1 %cmp.not, label %exit, label %for.body
1088
1089exit:
1090  %res = phi i32 [ %cond, %for.body ]
1091  ret i32 %res
1092}
1093
1094!llvm.module.flags = !{!0}
1095!0 = !{i32 1, !"ProfileSummary", !1}
1096!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
1097!2 = !{!"ProfileFormat", !"InstrProf"}
1098!3 = !{!"TotalCount", i64 10000}
1099!4 = !{!"MaxCount", i64 10}
1100!5 = !{!"MaxInternalCount", i64 1}
1101!6 = !{!"MaxFunctionCount", i64 1000}
1102!7 = !{!"NumCounts", i64 3}
1103!8 = !{!"NumFunctions", i64 3}
1104!9 = !{!"DetailedSummary", !10}
1105!10 = !{!11, !12, !13}
1106!11 = !{i32 10000, i64 100, i32 1}
1107!12 = !{i32 999000, i64 100, i32 1}
1108!13 = !{i32 999999, i64 1, i32 2}
1109!14 = !{!"function_entry_count", i64 0}
1110!15 = distinct !{!15, !16}
1111!16 = !{!"llvm.loop.vectorize.enable", i1 true}
1112;.
1113; CHECK: [[PROF14]] = !{!"function_entry_count", i64 0}
1114; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
1115; CHECK: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
1116; CHECK: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
1117; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
1118; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
1119; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]}
1120; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
1121; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META16]]}
1122; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
1123; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]}
1124;.
1125; PGSO: [[PROF14]] = !{!"function_entry_count", i64 0}
1126; PGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
1127; PGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
1128; PGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
1129; PGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
1130; PGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
1131; PGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]}
1132; PGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
1133; PGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]], [[META16]]}
1134; PGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
1135; PGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]}
1136;.
1137; NPGSO: [[PROF14]] = !{!"function_entry_count", i64 0}
1138; NPGSO: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
1139; NPGSO: [[META16]] = !{!"llvm.loop.isvectorized", i32 1}
1140; NPGSO: [[META17]] = !{!"llvm.loop.unroll.runtime.disable"}
1141; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]}
1142; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]}
1143; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META17]], [[META16]]}
1144; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]], [[META17]]}
1145; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]]}
1146; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]}
1147; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META17]], [[META16]]}
1148; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]}
1149; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META17]], [[META16]]}
1150; NPGSO: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]], [[META17]]}
1151; NPGSO: [[LOOP28]] = distinct !{[[LOOP28]], [[META16]]}
1152;.
1153