xref: /llvm-project/llvm/test/Transforms/IndVarSimplify/ARM/code-size.ll (revision 3ce360f15b5a790a5fd9dcab716bbed7b4d3a347)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=thumbv8m.main -passes=indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8M
3; RUN: opt -mtriple=thumbv8a -passes=indvars -S < %s | FileCheck %s --check-prefix=CHECK-V8A
4
5define i32 @remove_loop(i32 %size) #0 {
6; CHECK-V8M-LABEL: @remove_loop(
7; CHECK-V8M-NEXT:  entry:
8; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
9; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
10; CHECK-V8M-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
11; CHECK-V8M-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
12; CHECK-V8M-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
13; CHECK-V8M-NEXT:    br label [[WHILE_COND:%.*]]
14; CHECK-V8M:       while.cond:
15; CHECK-V8M-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
16; CHECK-V8M:       while.end:
17; CHECK-V8M-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
18; CHECK-V8M-NEXT:    ret i32 [[TMP4]]
19;
20; CHECK-V8A-LABEL: @remove_loop(
21; CHECK-V8A-NEXT:  entry:
22; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[SIZE:%.*]], 31
23; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SIZE]], i32 31)
24; CHECK-V8A-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[UMIN]]
25; CHECK-V8A-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], 5
26; CHECK-V8A-NEXT:    [[TMP3:%.*]] = shl nuw i32 [[TMP2]], 5
27; CHECK-V8A-NEXT:    br label [[WHILE_COND:%.*]]
28; CHECK-V8A:       while.cond:
29; CHECK-V8A-NEXT:    br i1 false, label [[WHILE_COND]], label [[WHILE_END:%.*]]
30; CHECK-V8A:       while.end:
31; CHECK-V8A-NEXT:    [[TMP4:%.*]] = sub i32 [[SIZE]], [[TMP3]]
32; CHECK-V8A-NEXT:    ret i32 [[TMP4]]
33;
34entry:
35  br label %while.cond
36
37while.cond:                                       ; preds = %while.cond, %entry
38  %size.addr.0 = phi i32 [ %size, %entry ], [ %sub, %while.cond ]
39  %cmp = icmp ugt i32 %size.addr.0, 31
40  %sub = add i32 %size.addr.0, -32
41  br i1 %cmp, label %while.cond, label %while.end
42
43while.end:                                        ; preds = %while.cond
44  %size.lcssa = phi i32 [ %size.addr.0, %while.cond ]
45  ret i32 %size.lcssa
46}
47
48define void @expandOuterRecurrence(i32 %arg) nounwind #0 {
49; CHECK-V8M-LABEL: @expandOuterRecurrence(
50; CHECK-V8M-NEXT:  entry:
51; CHECK-V8M-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1
52; CHECK-V8M-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
53; CHECK-V8M-NEXT:    br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
54; CHECK-V8M:       outer.preheader:
55; CHECK-V8M-NEXT:    br label [[OUTER:%.*]]
56; CHECK-V8M:       outer:
57; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
58; CHECK-V8M-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
59; CHECK-V8M-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
60; CHECK-V8M-NEXT:    [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
61; CHECK-V8M-NEXT:    br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]]
62; CHECK-V8M:       inner.ph:
63; CHECK-V8M-NEXT:    br label [[INNER:%.*]]
64; CHECK-V8M:       inner:
65; CHECK-V8M-NEXT:    br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
66; CHECK-V8M:       outer.inc.loopexit:
67; CHECK-V8M-NEXT:    br label [[OUTER_INC]]
68; CHECK-V8M:       outer.inc:
69; CHECK-V8M-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
70; CHECK-V8M-NEXT:    br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
71; CHECK-V8M:       exit.loopexit:
72; CHECK-V8M-NEXT:    br label [[EXIT]]
73; CHECK-V8M:       exit:
74; CHECK-V8M-NEXT:    ret void
75;
76; CHECK-V8A-LABEL: @expandOuterRecurrence(
77; CHECK-V8A-NEXT:  entry:
78; CHECK-V8A-NEXT:    [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1
79; CHECK-V8A-NEXT:    [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]]
80; CHECK-V8A-NEXT:    br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]]
81; CHECK-V8A:       outer.preheader:
82; CHECK-V8A-NEXT:    br label [[OUTER:%.*]]
83; CHECK-V8A:       outer:
84; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC:%.*]] ], [ 0, [[OUTER_PREHEADER]] ]
85; CHECK-V8A-NEXT:    [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]]
86; CHECK-V8A-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1
87; CHECK-V8A-NEXT:    [[CMP2:%.*]] = icmp slt i32 0, [[SUB3]]
88; CHECK-V8A-NEXT:    br i1 [[CMP2]], label [[INNER_PH:%.*]], label [[OUTER_INC]]
89; CHECK-V8A:       inner.ph:
90; CHECK-V8A-NEXT:    br label [[INNER:%.*]]
91; CHECK-V8A:       inner:
92; CHECK-V8A-NEXT:    br i1 false, label [[INNER]], label [[OUTER_INC_LOOPEXIT:%.*]]
93; CHECK-V8A:       outer.inc.loopexit:
94; CHECK-V8A-NEXT:    br label [[OUTER_INC]]
95; CHECK-V8A:       outer.inc:
96; CHECK-V8A-NEXT:    [[I_INC]] = add nuw nsw i32 [[I]], 1
97; CHECK-V8A-NEXT:    br i1 false, label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]]
98; CHECK-V8A:       exit.loopexit:
99; CHECK-V8A-NEXT:    br label [[EXIT]]
100; CHECK-V8A:       exit:
101; CHECK-V8A-NEXT:    ret void
102;
103entry:
104  %sub1 = sub nsw i32 %arg, 1
105  %cmp1 = icmp slt i32 0, %sub1
106  br i1 %cmp1, label %outer, label %exit
107
108outer:
109  %i = phi i32 [ 0, %entry ], [ %i.inc, %outer.inc ]
110  %sub2 = sub nsw i32 %arg, %i
111  %sub3 = sub nsw i32 %sub2, 1
112  %cmp2 = icmp slt i32 0, %sub3
113  br i1 %cmp2, label %inner.ph, label %outer.inc
114
115inner.ph:
116  br label %inner
117
118inner:
119  %j = phi i32 [ 0, %inner.ph ], [ %j.inc, %inner ]
120  %j.inc = add nsw i32 %j, 1
121  %cmp3 = icmp slt i32 %j.inc, %sub3
122  br i1 %cmp3, label %inner, label %outer.inc
123
124outer.inc:
125  %i.inc = add nsw i32 %i, 1
126  %cmp4 = icmp slt i32 %i.inc, %sub1
127  br i1 %cmp4, label %outer, label %exit
128
129exit:
130  ret void
131}
132
133define i32 @test1(ptr %array, i32 %length, i32 %n) #0 {
134; CHECK-V8M-LABEL: @test1(
135; CHECK-V8M-NEXT:  loop.preheader:
136; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
137; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
138; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
139; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
140; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
141; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
142; CHECK-V8M:       loop:
143; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
144; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
145; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]]
146; CHECK-V8M:       deopt:
147; CHECK-V8M-NEXT:    call void @prevent_merging()
148; CHECK-V8M-NEXT:    ret i32 -1
149; CHECK-V8M:       guarded:
150; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
151; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
152; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
153; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
154; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
155; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
156; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
157; CHECK-V8M:       exit:
158; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
159; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
160;
161; CHECK-V8A-LABEL: @test1(
162; CHECK-V8A-NEXT:  loop.preheader:
163; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
164; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
165; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
166; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
167; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
168; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
169; CHECK-V8A:       loop:
170; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
171; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
172; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]]
173; CHECK-V8A:       deopt:
174; CHECK-V8A-NEXT:    call void @prevent_merging()
175; CHECK-V8A-NEXT:    ret i32 -1
176; CHECK-V8A:       guarded:
177; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
178; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
179; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
180; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
181; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
182; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
183; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
184; CHECK-V8A:       exit:
185; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
186; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
187;
188loop.preheader:                                   ; preds = %entry
189  br label %loop
190
191loop:                                             ; preds = %guarded, %loop.preheader
192  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
193  %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
194  %within.bounds = icmp ult i32 %i, %length
195  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
196
197deopt:                                            ; preds = %loop
198  call void @prevent_merging()
199  ret i32 -1
200
201guarded:                                          ; preds = %loop
202  %i.i64 = zext i32 %i to i64
203  %array.i.ptr = getelementptr inbounds i32, ptr %array, i64 %i.i64
204  %array.i = load i32, ptr %array.i.ptr, align 4
205  %loop.acc.next = add i32 %loop.acc, %array.i
206  %i.next = add nuw i32 %i, 1
207  %continue = icmp ult i32 %i.next, %n
208  br i1 %continue, label %loop, label %exit
209
210exit:                                             ; preds = %guarded, %entry
211  %result = phi i32 [ %loop.acc.next, %guarded ]
212  ret i32 %result
213}
214
215declare void @maythrow()
216
217define i32 @test2(ptr %array, i32 %length, i32 %n) #0 {
218; CHECK-V8M-LABEL: @test2(
219; CHECK-V8M-NEXT:  loop.preheader:
220; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
221; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
222; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
223; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
224; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
225; CHECK-V8M:       loop:
226; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
227; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
228; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
229; CHECK-V8M:       deopt:
230; CHECK-V8M-NEXT:    call void @prevent_merging()
231; CHECK-V8M-NEXT:    ret i32 -1
232; CHECK-V8M:       guarded:
233; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
234; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
235; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
236; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
237; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
238; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
239; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
240; CHECK-V8M:       exit:
241; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
242; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
243;
244; CHECK-V8A-LABEL: @test2(
245; CHECK-V8A-NEXT:  loop.preheader:
246; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
247; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
248; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
249; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
250; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
251; CHECK-V8A:       loop:
252; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
253; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
254; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
255; CHECK-V8A:       deopt:
256; CHECK-V8A-NEXT:    call void @prevent_merging()
257; CHECK-V8A-NEXT:    ret i32 -1
258; CHECK-V8A:       guarded:
259; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
260; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]]
261; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
262; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
263; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
264; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ne i32 [[I_NEXT]], [[N]]
265; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
266; CHECK-V8A:       exit:
267; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
268; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
269;
270loop.preheader:                                   ; preds = %entry
271  br label %loop
272
273loop:                                             ; preds = %guarded, %loop.preheader
274  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
275  %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
276  %within.bounds = icmp ne i32 %i, %length
277  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
278
279deopt:                                            ; preds = %loop
280  call void @prevent_merging()
281  ret i32 -1
282
283guarded:                                          ; preds = %loop
284  %i.i64 = zext i32 %i to i64
285  %array.i.ptr = getelementptr inbounds i32, ptr %array, i64 %i.i64
286  %array.i = load i32, ptr %array.i.ptr, align 4
287  %loop.acc.next = add i32 %loop.acc, %array.i
288  %i.next = add nuw i32 %i, 1
289  %continue = icmp ne i32 %i.next, %n
290  br i1 %continue, label %loop, label %exit
291
292exit:                                             ; preds = %guarded, %entry
293  %result = phi i32 [ %loop.acc.next, %guarded ]
294  ret i32 %result
295}
296
297define i32 @two_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, i32 %n) #0 {
298; CHECK-V8M-LABEL: @two_range_checks(
299; CHECK-V8M-NEXT:  loop.preheader:
300; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]])
301; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
302; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
303; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
304; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]])
305; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]]
306; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
307; CHECK-V8M:       loop:
308; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
309; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
310; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
311; CHECK-V8M:       deopt:
312; CHECK-V8M-NEXT:    call void @prevent_merging()
313; CHECK-V8M-NEXT:    ret i32 -1
314; CHECK-V8M:       guarded:
315; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
316; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
317; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
318; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
319; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
320; CHECK-V8M-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
321; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
322; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
323; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
324; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
325; CHECK-V8M:       exit:
326; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
327; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
328;
329; CHECK-V8A-LABEL: @two_range_checks(
330; CHECK-V8A-NEXT:  loop.preheader:
331; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_2:%.*]], i32 [[LENGTH_1:%.*]])
332; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
333; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
334; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
335; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN]])
336; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN]], [[UMIN1]]
337; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
338; CHECK-V8A:       loop:
339; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
340; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
341; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
342; CHECK-V8A:       deopt:
343; CHECK-V8A-NEXT:    call void @prevent_merging()
344; CHECK-V8A-NEXT:    ret i32 -1
345; CHECK-V8A:       guarded:
346; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
347; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
348; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
349; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
350; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
351; CHECK-V8A-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
352; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
353; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
354; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
355; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
356; CHECK-V8A:       exit:
357; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
358; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
359;
360loop.preheader:                                   ; preds = %entry
361  br label %loop
362
363loop:                                             ; preds = %guarded, %loop.preheader
364  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
365  %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
366  %within.bounds.1 = icmp ult i32 %i, %length.1
367  %within.bounds.2 = icmp ult i32 %i, %length.2
368  %within.bounds = and i1 %within.bounds.1, %within.bounds.2
369  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
370
371deopt:                                            ; preds = %loop
372  call void @prevent_merging()
373  ret i32 -1
374
375guarded:                                          ; preds = %loop
376  %i.i64 = zext i32 %i to i64
377  %array.1.i.ptr = getelementptr inbounds i32, ptr %array.1, i64 %i.i64
378  %array.1.i = load i32, ptr %array.1.i.ptr, align 4
379  %loop.acc.1 = add i32 %loop.acc, %array.1.i
380  %array.2.i.ptr = getelementptr inbounds i32, ptr %array.2, i64 %i.i64
381  %array.2.i = load i32, ptr %array.2.i.ptr, align 4
382  %loop.acc.next = add i32 %loop.acc.1, %array.2.i
383  %i.next = add nuw i32 %i, 1
384  %continue = icmp ult i32 %i.next, %n
385  br i1 %continue, label %loop, label %exit
386
387exit:                                             ; preds = %guarded, %entry
388  %result = phi i32 [ %loop.acc.next, %guarded ]
389  ret i32 %result
390}
391
392define i32 @three_range_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, ptr %array.3, i32 %length.3, i32 %n) #0 {
393; CHECK-V8M-LABEL: @three_range_checks(
394; CHECK-V8M-NEXT:  loop.preheader:
395; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]])
396; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
397; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
398; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
399; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
400; CHECK-V8M-NEXT:    [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]])
401; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]]
402; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
403; CHECK-V8M:       loop:
404; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
405; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
406; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
407; CHECK-V8M:       deopt:
408; CHECK-V8M-NEXT:    call void @prevent_merging()
409; CHECK-V8M-NEXT:    ret i32 -1
410; CHECK-V8M:       guarded:
411; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
412; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
413; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
414; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
415; CHECK-V8M-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
416; CHECK-V8M-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
417; CHECK-V8M-NEXT:    [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
418; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
419; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
420; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]]
421; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
422; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
423; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
424; CHECK-V8M:       exit:
425; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
426; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
427;
428; CHECK-V8A-LABEL: @three_range_checks(
429; CHECK-V8A-NEXT:  loop.preheader:
430; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[LENGTH_3:%.*]], i32 [[LENGTH_2:%.*]])
431; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
432; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
433; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
434; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
435; CHECK-V8A-NEXT:    [[UMIN2:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[UMIN1]])
436; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[UMIN1]], [[UMIN2]]
437; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
438; CHECK-V8A:       loop:
439; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
440; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
441; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
442; CHECK-V8A:       deopt:
443; CHECK-V8A-NEXT:    call void @prevent_merging()
444; CHECK-V8A-NEXT:    ret i32 -1
445; CHECK-V8A:       guarded:
446; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
447; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
448; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
449; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
450; CHECK-V8A-NEXT:    [[ARRAY_2_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_2:%.*]], i64 [[I_I64]]
451; CHECK-V8A-NEXT:    [[ARRAY_2_I:%.*]] = load i32, ptr [[ARRAY_2_I_PTR]], align 4
452; CHECK-V8A-NEXT:    [[LOOP_ACC_2:%.*]] = add i32 [[LOOP_ACC_1]], [[ARRAY_2_I]]
453; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
454; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
455; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_2]], [[ARRAY_3_I]]
456; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
457; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
458; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
459; CHECK-V8A:       exit:
460; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
461; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
462;
463loop.preheader:                                   ; preds = %entry
464  br label %loop
465
466loop:                                             ; preds = %guarded, %loop.preheader
467  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
468  %i = phi i32 [ %i.next, %guarded ], [ 0, %loop.preheader ]
469  %within.bounds.1 = icmp ult i32 %i, %length.1
470  %within.bounds.2 = icmp ult i32 %i, %length.2
471  %within.bounds.3 = icmp ult i32 %i, %length.3
472  %within.bounds.1.and.2 = and i1 %within.bounds.1, %within.bounds.2
473  %within.bounds = and i1 %within.bounds.1.and.2, %within.bounds.3
474  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
475
476deopt:                                            ; preds = %loop
477  call void @prevent_merging()
478  ret i32 -1
479
480guarded:                                          ; preds = %loop
481  %i.i64 = zext i32 %i to i64
482  %array.1.i.ptr = getelementptr inbounds i32, ptr %array.1, i64 %i.i64
483  %array.1.i = load i32, ptr %array.1.i.ptr, align 4
484  %loop.acc.1 = add i32 %loop.acc, %array.1.i
485  %array.2.i.ptr = getelementptr inbounds i32, ptr %array.2, i64 %i.i64
486  %array.2.i = load i32, ptr %array.2.i.ptr, align 4
487  %loop.acc.2 = add i32 %loop.acc.1, %array.2.i
488  %array.3.i.ptr = getelementptr inbounds i32, ptr %array.3, i64 %i.i64
489  %array.3.i = load i32, ptr %array.3.i.ptr, align 4
490  %loop.acc.next = add i32 %loop.acc.2, %array.3.i
491  %i.next = add nuw i32 %i, 1
492  %continue = icmp ult i32 %i.next, %n
493  br i1 %continue, label %loop, label %exit
494
495exit:                                             ; preds = %guarded, %entry
496  %result = phi i32 [ %loop.acc.next, %guarded ]
497  ret i32 %result
498}
499
500; Analogous to the above, but with two distinct branches (on different conditions)
501define i32 @distinct_checks(ptr %array.1, i32 %length.1, ptr %array.2, i32 %length.2, ptr %array.3, i32 %length.3, i32 %n) #0 {
502; CHECK-V8M-LABEL: @distinct_checks(
503; CHECK-V8M-NEXT:  loop.preheader:
504; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
505; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
506; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
507; CHECK-V8M-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]]
508; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]])
509; CHECK-V8M-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
510; CHECK-V8M-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]]
511; CHECK-V8M-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]]
512; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
513; CHECK-V8M:       loop:
514; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
515; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
516; CHECK-V8M-NEXT:    br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
517; CHECK-V8M:       deopt:
518; CHECK-V8M-NEXT:    call void @prevent_merging()
519; CHECK-V8M-NEXT:    ret i32 -1
520; CHECK-V8M:       guarded:
521; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
522; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
523; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
524; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
525; CHECK-V8M-NEXT:    br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
526; CHECK-V8M:       deopt2:
527; CHECK-V8M-NEXT:    call void @prevent_merging()
528; CHECK-V8M-NEXT:    ret i32 -1
529; CHECK-V8M:       guarded1:
530; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
531; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
532; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
533; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
534; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
535; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
536; CHECK-V8M:       exit:
537; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
538; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
539;
540; CHECK-V8A-LABEL: @distinct_checks(
541; CHECK-V8A-NEXT:  loop.preheader:
542; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
543; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
544; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
545; CHECK-V8A-NEXT:    [[TMP2:%.*]] = freeze i32 [[LENGTH_2:%.*]]
546; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[TMP2]])
547; CHECK-V8A-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[LENGTH_1:%.*]])
548; CHECK-V8A-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[LENGTH_1]], [[UMIN1]]
549; CHECK-V8A-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[LENGTH_2]], [[UMIN1]]
550; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
551; CHECK-V8A:       loop:
552; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
553; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
554; CHECK-V8A-NEXT:    br i1 [[TMP3]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
555; CHECK-V8A:       deopt:
556; CHECK-V8A-NEXT:    call void @prevent_merging()
557; CHECK-V8A-NEXT:    ret i32 -1
558; CHECK-V8A:       guarded:
559; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
560; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
561; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
562; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
563; CHECK-V8A-NEXT:    br i1 [[TMP4]], label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
564; CHECK-V8A:       deopt2:
565; CHECK-V8A-NEXT:    call void @prevent_merging()
566; CHECK-V8A-NEXT:    ret i32 -1
567; CHECK-V8A:       guarded1:
568; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
569; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
570; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
571; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
572; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
573; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
574; CHECK-V8A:       exit:
575; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
576; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
577;
578loop.preheader:                                   ; preds = %entry
579  br label %loop
580
581loop:                                             ; preds = %guarded4, %loop.preheader
582  %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ]
583  %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ]
584  %within.bounds.1 = icmp ult i32 %i, %length.1
585  br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0
586
587deopt:                                            ; preds = %loop
588  call void @prevent_merging()
589  ret i32 -1
590
591guarded:                                          ; preds = %loop
592  %i.i64 = zext i32 %i to i64
593  %array.1.i.ptr = getelementptr inbounds i32, ptr %array.1, i64 %i.i64
594  %array.1.i = load i32, ptr %array.1.i.ptr, align 4
595  %loop.acc.1 = add i32 %loop.acc, %array.1.i
596  %within.bounds.2 = icmp ult i32 %i, %length.2
597  br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0
598
599deopt2:                                           ; preds = %guarded
600  call void @prevent_merging()
601  ret i32 -1
602
603guarded1:                                         ; preds = %guarded1
604  %array.3.i.ptr = getelementptr inbounds i32, ptr %array.3, i64 %i.i64
605  %array.3.i = load i32, ptr %array.3.i.ptr, align 4
606  %loop.acc.next = add i32 %loop.acc.1, %array.3.i
607  %i.next = add nuw i32 %i, 1
608  %continue = icmp ult i32 %i.next, %n
609  br i1 %continue, label %loop, label %exit
610
611exit:
612  %result = phi i32 [ %loop.acc.next, %guarded1 ]
613  ret i32 %result
614}
615
616define i32 @duplicate_checks(ptr %array.1, ptr %array.2, ptr %array.3, i32 %length, i32 %n) #0 {
617; CHECK-V8M-LABEL: @duplicate_checks(
618; CHECK-V8M-NEXT:  loop.preheader:
619; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
620; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
621; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
622; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
623; CHECK-V8M-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
624; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
625; CHECK-V8M:       loop:
626; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
627; CHECK-V8M-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
628; CHECK-V8M-NEXT:    br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
629; CHECK-V8M:       deopt:
630; CHECK-V8M-NEXT:    call void @prevent_merging()
631; CHECK-V8M-NEXT:    ret i32 -1
632; CHECK-V8M:       guarded:
633; CHECK-V8M-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
634; CHECK-V8M-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
635; CHECK-V8M-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
636; CHECK-V8M-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
637; CHECK-V8M-NEXT:    br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
638; CHECK-V8M:       deopt2:
639; CHECK-V8M-NEXT:    call void @prevent_merging()
640; CHECK-V8M-NEXT:    ret i32 -1
641; CHECK-V8M:       guarded1:
642; CHECK-V8M-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
643; CHECK-V8M-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
644; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
645; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
646; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
647; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
648; CHECK-V8M:       exit:
649; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
650; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
651;
652; CHECK-V8A-LABEL: @duplicate_checks(
653; CHECK-V8A-NEXT:  loop.preheader:
654; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N:%.*]], i32 1)
655; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add i32 [[UMAX]], -1
656; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i32 [[TMP0]]
657; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 [[LENGTH:%.*]])
658; CHECK-V8A-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[LENGTH]], [[UMIN]]
659; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
660; CHECK-V8A:       loop:
661; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED1:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
662; CHECK-V8A-NEXT:    [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED1]] ], [ 0, [[LOOP_PREHEADER]] ]
663; CHECK-V8A-NEXT:    br i1 [[TMP2]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]]
664; CHECK-V8A:       deopt:
665; CHECK-V8A-NEXT:    call void @prevent_merging()
666; CHECK-V8A-NEXT:    ret i32 -1
667; CHECK-V8A:       guarded:
668; CHECK-V8A-NEXT:    [[I_I64:%.*]] = zext i32 [[I]] to i64
669; CHECK-V8A-NEXT:    [[ARRAY_1_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_1:%.*]], i64 [[I_I64]]
670; CHECK-V8A-NEXT:    [[ARRAY_1_I:%.*]] = load i32, ptr [[ARRAY_1_I_PTR]], align 4
671; CHECK-V8A-NEXT:    [[LOOP_ACC_1:%.*]] = add i32 [[LOOP_ACC]], [[ARRAY_1_I]]
672; CHECK-V8A-NEXT:    br i1 true, label [[GUARDED1]], label [[DEOPT2:%.*]], !prof [[PROF0]]
673; CHECK-V8A:       deopt2:
674; CHECK-V8A-NEXT:    call void @prevent_merging()
675; CHECK-V8A-NEXT:    ret i32 -1
676; CHECK-V8A:       guarded1:
677; CHECK-V8A-NEXT:    [[ARRAY_3_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY_3:%.*]], i64 [[I_I64]]
678; CHECK-V8A-NEXT:    [[ARRAY_3_I:%.*]] = load i32, ptr [[ARRAY_3_I_PTR]], align 4
679; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC_1]], [[ARRAY_3_I]]
680; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw i32 [[I]], 1
681; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]]
682; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
683; CHECK-V8A:       exit:
684; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED1]] ]
685; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
686;
687loop.preheader:                                   ; preds = %entry
688  br label %loop
689
690loop:                                             ; preds = %guarded4, %loop.preheader
691  %loop.acc = phi i32 [ %loop.acc.next, %guarded1 ], [ 0, %loop.preheader ]
692  %i = phi i32 [ %i.next, %guarded1 ], [ 0, %loop.preheader ]
693  %within.bounds.1 = icmp ult i32 %i, %length
694  br i1 %within.bounds.1, label %guarded, label %deopt, !prof !0
695
696deopt:                                            ; preds = %loop
697  call void @prevent_merging()
698  ret i32 -1
699
700guarded:                                          ; preds = %loop
701  %i.i64 = zext i32 %i to i64
702  %array.1.i.ptr = getelementptr inbounds i32, ptr %array.1, i64 %i.i64
703  %array.1.i = load i32, ptr %array.1.i.ptr, align 4
704  %loop.acc.1 = add i32 %loop.acc, %array.1.i
705  %within.bounds.2 = icmp ult i32 %i, %length
706  br i1 %within.bounds.2, label %guarded1, label %deopt2, !prof !0
707
708deopt2:                                           ; preds = %guarded
709  call void @prevent_merging()
710  ret i32 -1
711
712guarded1:                                         ; preds = %guarded1
713  %array.3.i.ptr = getelementptr inbounds i32, ptr %array.3, i64 %i.i64
714  %array.3.i = load i32, ptr %array.3.i.ptr, align 4
715  %loop.acc.next = add i32 %loop.acc.1, %array.3.i
716  %i.next = add nuw i32 %i, 1
717  %continue = icmp ult i32 %i.next, %n
718  br i1 %continue, label %loop, label %exit
719
720exit:
721  %result = phi i32 [ %loop.acc.next, %guarded1 ]
722  ret i32 %result
723}
724
725; Demonstrate that this approach works with IVs of different steps, and types
726; This version uses a manually lftred exit condition to work around an issue described
727; in detail on next test.
728define i32 @different_ivs(ptr %array, i32 %length, i32 %n) #0 {
729; CHECK-V8M-LABEL: @different_ivs(
730; CHECK-V8M-NEXT:  loop.preheader:
731; CHECK-V8M-NEXT:    [[N64:%.*]] = zext i32 [[N:%.*]] to i64
732; CHECK-V8M-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1)
733; CHECK-V8M-NEXT:    [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1
734; CHECK-V8M-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
735; CHECK-V8M-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64
736; CHECK-V8M-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]])
737; CHECK-V8M-NEXT:    [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64
738; CHECK-V8M-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]]
739; CHECK-V8M-NEXT:    br label [[LOOP:%.*]]
740; CHECK-V8M:       loop:
741; CHECK-V8M-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
742; CHECK-V8M-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
743; CHECK-V8M-NEXT:    br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
744; CHECK-V8M:       deopt:
745; CHECK-V8M-NEXT:    call void @prevent_merging()
746; CHECK-V8M-NEXT:    ret i32 -1
747; CHECK-V8M:       guarded:
748; CHECK-V8M-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I]]
749; CHECK-V8M-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
750; CHECK-V8M-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
751; CHECK-V8M-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
752; CHECK-V8M-NEXT:    [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]]
753; CHECK-V8M-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
754; CHECK-V8M:       exit:
755; CHECK-V8M-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
756; CHECK-V8M-NEXT:    ret i32 [[RESULT]]
757;
758; CHECK-V8A-LABEL: @different_ivs(
759; CHECK-V8A-NEXT:  loop.preheader:
760; CHECK-V8A-NEXT:    [[N64:%.*]] = zext i32 [[N:%.*]] to i64
761; CHECK-V8A-NEXT:    [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N64]], i64 1)
762; CHECK-V8A-NEXT:    [[TMP0:%.*]] = add nsw i64 [[UMAX]], -1
763; CHECK-V8A-NEXT:    [[TMP1:%.*]] = freeze i64 [[TMP0]]
764; CHECK-V8A-NEXT:    [[TMP2:%.*]] = zext i32 [[LENGTH:%.*]] to i64
765; CHECK-V8A-NEXT:    [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[TMP2]])
766; CHECK-V8A-NEXT:    [[TMP3:%.*]] = zext i32 [[LENGTH]] to i64
767; CHECK-V8A-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], [[UMIN]]
768; CHECK-V8A-NEXT:    br label [[LOOP:%.*]]
769; CHECK-V8A:       loop:
770; CHECK-V8A-NEXT:    [[LOOP_ACC:%.*]] = phi i32 [ [[LOOP_ACC_NEXT:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_PREHEADER:%.*]] ]
771; CHECK-V8A-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ]
772; CHECK-V8A-NEXT:    br i1 [[TMP4]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0]]
773; CHECK-V8A:       deopt:
774; CHECK-V8A-NEXT:    call void @prevent_merging()
775; CHECK-V8A-NEXT:    ret i32 -1
776; CHECK-V8A:       guarded:
777; CHECK-V8A-NEXT:    [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I]]
778; CHECK-V8A-NEXT:    [[ARRAY_I:%.*]] = load i32, ptr [[ARRAY_I_PTR]], align 4
779; CHECK-V8A-NEXT:    [[LOOP_ACC_NEXT]] = add i32 [[LOOP_ACC]], [[ARRAY_I]]
780; CHECK-V8A-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
781; CHECK-V8A-NEXT:    [[CONTINUE:%.*]] = icmp ult i64 [[I_NEXT]], [[N64]]
782; CHECK-V8A-NEXT:    br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT:%.*]]
783; CHECK-V8A:       exit:
784; CHECK-V8A-NEXT:    [[RESULT:%.*]] = phi i32 [ [[LOOP_ACC_NEXT]], [[GUARDED]] ]
785; CHECK-V8A-NEXT:    ret i32 [[RESULT]]
786;
787loop.preheader:
788  %j.start = sub nuw nsw i32 %length, 1
789  %n64 = zext i32 %n to i64
790  br label %loop
791
792loop:
793  %loop.acc = phi i32 [ %loop.acc.next, %guarded ], [ 0, %loop.preheader ]
794  %i = phi i64 [ %i.next, %guarded ], [ 0, %loop.preheader ]
795  %j = phi i32 [ %j.next, %guarded ], [ %j.start, %loop.preheader ]
796  %within.bounds = icmp ne i32 %j, -1
797  br i1 %within.bounds, label %guarded, label %deopt, !prof !0
798
799deopt:
800  call void @prevent_merging()
801  ret i32 -1
802
803guarded:
804  %array.i.ptr = getelementptr inbounds i32, ptr %array, i64 %i
805  %array.i = load i32, ptr %array.i.ptr, align 4
806  %loop.acc.next = add i32 %loop.acc, %array.i
807  %i.next = add nuw i64 %i, 1
808  %j.next = sub nuw i32 %j, 1
809  %continue = icmp ult i64 %i.next, %n64
810  br i1 %continue, label %loop, label %exit
811
812exit:
813  %result = phi i32 [ %loop.acc.next, %guarded ]
814  ret i32 %result
815}
816
817declare void @prevent_merging()
818declare void @call()
819
820!0 = !{!"branch_weights", i32 1048576, i32 1}
821!1 = !{i32 1, i32 -2147483648}
822!2 = !{i32 0, i32 50}
823
824attributes #0 = { minsize optsize }
825