xref: /llvm-project/llvm/test/Transforms/LoopFusion/guarded.ll (revision 055fb7795aa219a3d274d280ec9129784f169f56)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=loop-fusion < %s | FileCheck %s
3
4@B = common global [1024 x i32] zeroinitializer, align 16
5
6define void @dep_free_parametric(ptr noalias %A, i64 %N) {
7; CHECK-LABEL: @dep_free_parametric(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]]
10; CHECK-NEXT:    [[CMP31:%.*]] = icmp slt i64 0, [[N]]
11; CHECK-NEXT:    br i1 [[CMP4]], label [[BB3:%.*]], label [[BB12:%.*]]
12; CHECK:       bb3:
13; CHECK-NEXT:    br label [[BB5:%.*]]
14; CHECK:       bb5:
15; CHECK-NEXT:    [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
16; CHECK-NEXT:    [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB5]] ], [ 0, [[BB3]] ]
17; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i64 [[I_05]], 3
18; CHECK-NEXT:    [[ADD:%.*]] = add nsw i64 [[I_05]], 3
19; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]]
20; CHECK-NEXT:    [[REM:%.*]] = srem i64 [[MUL]], [[I_05]]
21; CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[REM]] to i32
22; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_05]]
23; CHECK-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX]], align 4
24; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_05]], 1
25; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
26; CHECK-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3
27; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3
28; CHECK-NEXT:    [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]]
29; CHECK-NEXT:    [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]]
30; CHECK-NEXT:    [[CONV11:%.*]] = trunc i64 [[REM10]] to i32
31; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 [[I1_02]]
32; CHECK-NEXT:    store i32 [[CONV11]], ptr [[ARRAYIDX12]], align 4
33; CHECK-NEXT:    [[INC14]] = add nsw i64 [[I1_02]], 1
34; CHECK-NEXT:    [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]]
35; CHECK-NEXT:    br i1 [[CMP3]], label [[BB5]], label [[BB15:%.*]]
36; CHECK:       bb15:
37; CHECK-NEXT:    br label [[BB12]]
38; CHECK:       bb12:
39; CHECK-NEXT:    ret void
40;
41entry:
42  %cmp4 = icmp slt i64 0, %N
43  br i1 %cmp4, label %bb3, label %bb14
44
45bb3:                               ; preds = %entry
46  br label %bb5
47
48bb5:                                         ; preds = %bb3, %bb5
49  %i.05 = phi i64 [ %inc, %bb5 ], [ 0, %bb3 ]
50  %sub = sub nsw i64 %i.05, 3
51  %add = add nsw i64 %i.05, 3
52  %mul = mul nsw i64 %sub, %add
53  %rem = srem i64 %mul, %i.05
54  %conv = trunc i64 %rem to i32
55  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.05
56  store i32 %conv, ptr %arrayidx, align 4
57  %inc = add nsw i64 %i.05, 1
58  %cmp = icmp slt i64 %inc, %N
59  br i1 %cmp, label %bb5, label %bb10
60
61bb10:                                 ; preds = %bb5
62  br label %bb14
63
64bb14:                                          ; preds = %bb10, %entry
65  %cmp31 = icmp slt i64 0, %N
66  br i1 %cmp31, label %bb8, label %bb12
67
68bb8:                              ; preds = %bb14
69  br label %bb9
70
71bb9:                                        ; preds = %bb8, %bb9
72  %i1.02 = phi i64 [ %inc14, %bb9 ], [ 0, %bb8 ]
73  %sub7 = sub nsw i64 %i1.02, 3
74  %add8 = add nsw i64 %i1.02, 3
75  %mul9 = mul nsw i64 %sub7, %add8
76  %rem10 = srem i64 %mul9, %i1.02
77  %conv11 = trunc i64 %rem10 to i32
78  %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.02
79  store i32 %conv11, ptr %arrayidx12, align 4
80  %inc14 = add nsw i64 %i1.02, 1
81  %cmp3 = icmp slt i64 %inc14, %N
82  br i1 %cmp3, label %bb9, label %bb15
83
84bb15:                               ; preds = %bb9
85  br label %bb12
86
87bb12:                                        ; preds = %bb15, %bb14
88  ret void
89}
90
91; Test that `%add` is moved in for.first.preheader, and the two loops for.first
92; and for.second are fused.
93
94define void @moveinsts_preheader(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
95; CHECK-LABEL: @moveinsts_preheader(
96; CHECK-NEXT:  for.first.guard:
97; CHECK-NEXT:    [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
98; CHECK-NEXT:    br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
99; CHECK:       for.first.preheader:
100; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
101; CHECK-NEXT:    br label [[FOR_FIRST:%.*]]
102; CHECK:       for.first:
103; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[INC_I:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
104; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[INC_J:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
105; CHECK-NEXT:    [[AI:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I]]
106; CHECK-NEXT:    store i32 0, ptr [[AI]], align 4
107; CHECK-NEXT:    [[INC_I]] = add nsw i64 [[I]], 1
108; CHECK-NEXT:    [[CMP_I:%.*]] = icmp slt i64 [[INC_I]], [[N]]
109; CHECK-NEXT:    [[BJ:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J]]
110; CHECK-NEXT:    store i32 0, ptr [[BJ]], align 4
111; CHECK-NEXT:    [[INC_J]] = add nsw i64 [[J]], 1
112; CHECK-NEXT:    [[CMP_J:%.*]] = icmp slt i64 [[INC_J]], [[N]]
113; CHECK-NEXT:    br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
114; CHECK:       for.second.exit:
115; CHECK-NEXT:    br label [[FOR_END]]
116; CHECK:       for.end:
117; CHECK-NEXT:    ret void
118;
119for.first.guard:
120  %cmp.guard = icmp slt i64 0, %N
121  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
122
123for.first.preheader:
124  br label %for.first
125
126for.first:
127  %i = phi i64 [ %inc.i, %for.first ], [ 0, %for.first.preheader ]
128  %Ai = getelementptr inbounds i32, ptr %A, i64 %i
129  store i32 0, ptr %Ai, align 4
130  %inc.i = add nsw i64 %i, 1
131  %cmp.i = icmp slt i64 %inc.i, %N
132  br i1 %cmp.i, label %for.first, label %for.first.exit
133
134for.first.exit:
135  br label %for.second.guard
136
137for.second.guard:
138  br i1 %cmp.guard, label %for.second.preheader, label %for.end
139
140for.second.preheader:
141  %add = add nsw i32 %x, 1
142  br label %for.second
143
144for.second:
145  %j = phi i64 [ %inc.j, %for.second ], [ 0, %for.second.preheader ]
146  %Bj = getelementptr inbounds i32, ptr %B, i64 %j
147  store i32 0, ptr %Bj, align 4
148  %inc.j = add nsw i64 %j, 1
149  %cmp.j = icmp slt i64 %inc.j, %N
150  br i1 %cmp.j, label %for.second, label %for.second.exit
151
152for.second.exit:
153  br label %for.end
154
155for.end:
156  ret void
157}
158
159; Test that `%add` is moved in for.second.exit, and the two loops for.first
160; and for.second are fused.
161
162define void @moveinsts_exitblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
163; CHECK-LABEL: @moveinsts_exitblock(
164; CHECK-NEXT:  for.first.guard:
165; CHECK-NEXT:    [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
166; CHECK-NEXT:    br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
167; CHECK:       for.first.preheader:
168; CHECK-NEXT:    br label [[FOR_FIRST:%.*]]
169; CHECK:       for.first:
170; CHECK-NEXT:    [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
171; CHECK-NEXT:    [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
172; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
173; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4
174; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_04]], 1
175; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
176; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
177; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX4]], align 4
178; CHECK-NEXT:    [[INC6]] = add nsw i64 [[J_02]], 1
179; CHECK-NEXT:    [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
180; CHECK-NEXT:    br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
181; CHECK:       for.second.exit:
182; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
183; CHECK-NEXT:    br label [[FOR_END]]
184; CHECK:       for.end:
185; CHECK-NEXT:    ret void
186;
187for.first.guard:
188  %cmp.guard = icmp slt i64 0, %N
189  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
190
191for.first.preheader:
192  br label %for.first
193
194for.first:
195  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
196  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
197  store i32 0, ptr %arrayidx, align 4
198  %inc = add nsw i64 %i.04, 1
199  %cmp = icmp slt i64 %inc, %N
200  br i1 %cmp, label %for.first, label %for.first.exit
201
202for.first.exit:
203  %add = add nsw i32 %x, 1
204  br label %for.second.guard
205
206for.second.guard:
207  br i1 %cmp.guard, label %for.second.preheader, label %for.end
208
209for.second.preheader:
210  br label %for.second
211
212for.second:
213  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
214  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
215  store i32 0, ptr %arrayidx4, align 4
216  %inc6 = add nsw i64 %j.02, 1
217  %cmp.j = icmp slt i64 %inc6, %N
218  br i1 %cmp.j, label %for.second, label %for.second.exit
219
220for.second.exit:
221  br label %for.end
222
223for.end:
224  ret void
225}
226
227; Test that `%add` is moved in for.first.guard, and the two loops for.first
228; and for.second are fused.
229
230define void @moveinsts_guardblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
231; CHECK-LABEL: @moveinsts_guardblock(
232; CHECK-NEXT:  for.first.guard:
233; CHECK-NEXT:    [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
234; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1
235; CHECK-NEXT:    br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
236; CHECK:       for.first.preheader:
237; CHECK-NEXT:    br label [[FOR_FIRST:%.*]]
238; CHECK:       for.first:
239; CHECK-NEXT:    [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
240; CHECK-NEXT:    [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
241; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
242; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4
243; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_04]], 1
244; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
245; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
246; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX4]], align 4
247; CHECK-NEXT:    [[INC6]] = add nsw i64 [[J_02]], 1
248; CHECK-NEXT:    [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
249; CHECK-NEXT:    br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
250; CHECK:       for.second.exit:
251; CHECK-NEXT:    br label [[FOR_END]]
252; CHECK:       for.end:
253; CHECK-NEXT:    ret void
254;
255for.first.guard:
256  %cmp.guard = icmp slt i64 0, %N
257  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
258
259for.first.preheader:
260  br label %for.first
261
262for.first:
263  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
264  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
265  store i32 0, ptr %arrayidx, align 4
266  %inc = add nsw i64 %i.04, 1
267  %cmp = icmp slt i64 %inc, %N
268  br i1 %cmp, label %for.first, label %for.first.exit
269
270for.first.exit:
271  br label %for.second.guard
272
273for.second.guard:
274  %add = add nsw i32 %x, 1
275  br i1 %cmp.guard, label %for.second.preheader, label %for.end
276
277for.second.preheader:
278  br label %for.second
279
280for.second:
281  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
282  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
283  store i32 0, ptr %arrayidx4, align 4
284  %inc6 = add nsw i64 %j.02, 1
285  %cmp.j = icmp slt i64 %inc6, %N
286  br i1 %cmp.j, label %for.second, label %for.second.exit
287
288for.second.exit:
289  br label %for.end
290
291for.end:
292  ret void
293}
294
295; Test that the incoming block of `%j.lcssa` is updated correctly
296; from for.second.guard to for.first.guard, and the two loops for.first and
297; for.second are fused.
298
299define i64 @updatephi_guardnonloopblock(ptr noalias %A, ptr noalias %B, i64 %N, i32 %x) {
300; CHECK-LABEL: @updatephi_guardnonloopblock(
301; CHECK-NEXT:  for.first.guard:
302; CHECK-NEXT:    [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]]
303; CHECK-NEXT:    br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]]
304; CHECK:       for.first.preheader:
305; CHECK-NEXT:    br label [[FOR_FIRST:%.*]]
306; CHECK:       for.first:
307; CHECK-NEXT:    [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
308; CHECK-NEXT:    [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ]
309; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_04]]
310; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4
311; CHECK-NEXT:    [[INC]] = add nsw i64 [[I_04]], 1
312; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]]
313; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[J_02]]
314; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX4]], align 4
315; CHECK-NEXT:    [[INC6]] = add nsw i64 [[J_02]], 1
316; CHECK-NEXT:    [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]]
317; CHECK-NEXT:    br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]]
318; CHECK:       for.second.exit:
319; CHECK-NEXT:    br label [[FOR_END]]
320; CHECK:       for.end:
321; CHECK-NEXT:    [[J_LCSSA:%.*]] = phi i64 [ 0, [[FOR_FIRST_GUARD:%.*]] ], [ [[J_02]], [[FOR_SECOND_EXIT]] ]
322; CHECK-NEXT:    ret i64 [[J_LCSSA]]
323;
324for.first.guard:
325  %cmp.guard = icmp slt i64 0, %N
326  br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard
327
328for.first.preheader:
329  br label %for.first
330
331for.first:
332  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
333  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.04
334  store i32 0, ptr %arrayidx, align 4
335  %inc = add nsw i64 %i.04, 1
336  %cmp = icmp slt i64 %inc, %N
337  br i1 %cmp, label %for.first, label %for.first.exit
338
339for.first.exit:
340  br label %for.second.guard
341
342for.second.guard:
343  br i1 %cmp.guard, label %for.second.preheader, label %for.end
344
345for.second.preheader:
346  br label %for.second
347
348for.second:
349  %j.02 = phi i64 [ %inc6, %for.second ], [ 0, %for.second.preheader ]
350  %arrayidx4 = getelementptr inbounds i32, ptr %B, i64 %j.02
351  store i32 0, ptr %arrayidx4, align 4
352  %inc6 = add nsw i64 %j.02, 1
353  %cmp.j = icmp slt i64 %inc6, %N
354  br i1 %cmp.j, label %for.second, label %for.second.exit
355
356for.second.exit:
357  br label %for.end
358
359for.end:
360  %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ]
361  ret i64 %j.lcssa
362}
363
364define void @pr59024() {
365; CHECK-LABEL: @pr59024(
366; CHECK-NEXT:  entry:
367; CHECK-NEXT:    br i1 false, label [[FOR_2_PREHEADER:%.*]], label [[FOR_1_PREHEADER:%.*]]
368; CHECK:       for.1.preheader:
369; CHECK-NEXT:    br label [[FOR_1:%.*]]
370; CHECK:       for.1:
371; CHECK-NEXT:    br i1 true, label [[FOR_2_PREHEADER_LOOPEXIT:%.*]], label [[FOR_1]]
372; CHECK:       for.2.preheader.loopexit:
373; CHECK-NEXT:    br label [[FOR_2_PREHEADER]]
374; CHECK:       for.2.preheader:
375; CHECK-NEXT:    br label [[FOR_2:%.*]]
376; CHECK:       for.2:
377; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[FOR_2]]
378; CHECK:       exit:
379; CHECK-NEXT:    ret void
380;
381entry:
382  br i1 false, label %for.2, label %for.1
383
384for.1:                                        ; preds = %for.body6, %entry
385  br i1 true, label %for.2, label %for.1
386
387for.2:                                       ; preds = %for.cond13, %for.body6, %entry
388  br i1 true, label %exit, label %for.2
389
390exit:                                          ; preds = %for.cond13
391  ret void
392}
393