xref: /llvm-project/llvm/test/Transforms/LICM/loopsink.ll (revision e3cf80c5c1fe55efd8216575ccadea0ab087e79c)
1; RUN: opt -S -verify-memoryssa -passes=loop-sink < %s | FileCheck %s
2; RUN: opt -S -verify-memoryssa -aa-pipeline=basic-aa -passes=loop-sink < %s | FileCheck %s
3
4@g = global i32 0, align 4
5
6;     b1
7;    /  \
8;   b2  b6
9;  /  \  |
10; b3  b4 |
11;  \  /  |
12;   b5   |
13;    \  /
14;     b7
15; preheader: 1000
16; b2: 15
17; b3: 7
18; b4: 7
19; Sink load to b2
20; CHECK: t1
21; CHECK: .b2:
22; CHECK: load i32, ptr @g
23; CHECK: .b3:
24; CHECK-NOT:  load i32, ptr @g
25define i32 @t1(i32, i32) #0 !prof !0 {
26  %3 = icmp eq i32 %1, 0
27  br i1 %3, label %.exit, label %.preheader
28
29.preheader:
30  %invariant = load i32, ptr @g
31  br label %.b1
32
33.b1:
34  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
35  %c1 = icmp sgt i32 %iv, %0
36  br i1 %c1, label %.b2, label %.b6, !prof !1
37
38.b2:
39  %c2 = icmp sgt i32 %iv, 1
40  br i1 %c2, label %.b3, label %.b4
41
42.b3:
43  %t3 = sub nsw i32 %invariant, %iv
44  br label %.b5
45
46.b4:
47  %t4 = add nsw i32 %invariant, %iv
48  br label %.b5
49
50.b5:
51  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
52  %t5 = mul nsw i32 %p5, 5
53  br label %.b7
54
55.b6:
56  %t6 = add nsw i32 %iv, 100
57  br label %.b7
58
59.b7:
60  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
61  %t7 = add nuw nsw i32 %iv, 1
62  %c7 = icmp eq i32 %t7, %p7
63  br i1 %c7, label %.b1, label %.exit, !prof !3
64
65.exit:
66  ret i32 10
67}
68
69;     b1
70;    /  \
71;   b2  b6
72;  /  \  |
73; b3  b4 |
74;  \  /  |
75;   b5   |
76;    \  /
77;     b7
78; preheader: 500
79; b1: 16016
80; b3: 8
81; b6: 8
82; Sink load to b3 and b6
83; CHECK: t2
84; CHECK: .preheader:
85; CHECK-NOT: load i32, ptr @g
86; CHECK: .b3:
87; CHECK: load i32, ptr @g
88; CHECK: .b4:
89; CHECK: .b6:
90; CHECK: load i32, ptr @g
91; CHECK: .b7:
92define i32 @t2(i32, i32) #0 !prof !0 {
93  %3 = icmp eq i32 %1, 0
94  br i1 %3, label %.exit, label %.preheader
95
96.preheader:
97  %invariant = load i32, ptr @g
98  br label %.b1
99
100.b1:
101  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
102  %c1 = icmp sgt i32 %iv, %0
103  br i1 %c1, label %.b2, label %.b6, !prof !2
104
105.b2:
106  %c2 = icmp sgt i32 %iv, 1
107  br i1 %c2, label %.b3, label %.b4, !prof !1
108
109.b3:
110  %t3 = sub nsw i32 %invariant, %iv
111  br label %.b5
112
113.b4:
114  %t4 = add nsw i32 5, %iv
115  br label %.b5
116
117.b5:
118  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
119  %t5 = mul nsw i32 %p5, 5
120  br label %.b7
121
122.b6:
123  %t6 = add nsw i32 %iv, %invariant
124  br label %.b7
125
126.b7:
127  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
128  %t7 = add nuw nsw i32 %iv, 1
129  %c7 = icmp eq i32 %t7, %p7
130  br i1 %c7, label %.b1, label %.exit, !prof !3
131
132.exit:
133  ret i32 10
134}
135
136;     b1
137;    /  \
138;   b2  b6
139;  /  \  |
140; b3  b4 |
141;  \  /  |
142;   b5   |
143;    \  /
144;     b7
145; preheader: 500
146; b3: 8
147; b5: 16008
148; Do not sink load from preheader.
149; CHECK: t3
150; CHECK: .preheader:
151; CHECK: load i32, ptr @g
152; CHECK: .b1:
153; CHECK-NOT: load i32, ptr @g
154define i32 @t3(i32, i32) #0 !prof !0 {
155  %3 = icmp eq i32 %1, 0
156  br i1 %3, label %.exit, label %.preheader
157
158.preheader:
159  %invariant = load i32, ptr @g
160  br label %.b1
161
162.b1:
163  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
164  %c1 = icmp sgt i32 %iv, %0
165  br i1 %c1, label %.b2, label %.b6, !prof !2
166
167.b2:
168  %c2 = icmp sgt i32 %iv, 1
169  br i1 %c2, label %.b3, label %.b4, !prof !1
170
171.b3:
172  %t3 = sub nsw i32 %invariant, %iv
173  br label %.b5
174
175.b4:
176  %t4 = add nsw i32 5, %iv
177  br label %.b5
178
179.b5:
180  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
181  %t5 = mul nsw i32 %p5, %invariant
182  br label %.b7
183
184.b6:
185  %t6 = add nsw i32 %iv, 5
186  br label %.b7
187
188.b7:
189  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
190  %t7 = add nuw nsw i32 %iv, 1
191  %c7 = icmp eq i32 %t7, %p7
192  br i1 %c7, label %.b1, label %.exit, !prof !3
193
194.exit:
195  ret i32 10
196}
197
198; For single-BB loop with <=1 avg trip count, sink load to body
199; CHECK: t4
200; CHECK: .header:
201; CHECK-NOT: load i32, ptr @g
202; CHECK: .body:
203; CHECK: load i32, ptr @g
204; CHECK: .exit:
205define i32 @t4(i32, i32) #0 !prof !0 {
206.entry:
207  %invariant = load i32, ptr @g
208  br label %.header
209
210.header:
211  %iv = phi i32 [ %t1, %.body ], [ 0, %.entry ]
212  %c0 = icmp sgt i32 %iv, %0
213  br i1 %c0, label %.body, label %.exit, !prof !1
214
215.body:
216  %t1 = add nsw i32 %invariant, %iv
217  %c1 = icmp sgt i32 %iv, %0
218  br label %.header
219
220.exit:
221  ret i32 10
222}
223
224;     b1
225;    /  \
226;   b2  b6
227;  /  \  |
228; b3  b4 |
229;  \  /  |
230;   b5   |
231;    \  /
232;     b7
233; preheader: 1000
234; b2: 15
235; b3: 7
236; b4: 7
237; There is alias store in loop, do not sink load
238; CHECK: t5
239; CHECK: .preheader:
240; CHECK: load i32, ptr @g
241; CHECK: .b1:
242; CHECK-NOT: load i32, ptr @g
243define i32 @t5(i32, ptr) #0 !prof !0 {
244  %3 = icmp eq i32 %0, 0
245  br i1 %3, label %.exit, label %.preheader
246
247.preheader:
248  %invariant = load i32, ptr @g
249  br label %.b1
250
251.b1:
252  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
253  %c1 = icmp sgt i32 %iv, %0
254  br i1 %c1, label %.b2, label %.b6, !prof !1
255
256.b2:
257  %c2 = icmp sgt i32 %iv, 1
258  br i1 %c2, label %.b3, label %.b4
259
260.b3:
261  %t3 = sub nsw i32 %invariant, %iv
262  br label %.b5
263
264.b4:
265  %t4 = add nsw i32 %invariant, %iv
266  br label %.b5
267
268.b5:
269  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
270  %t5 = mul nsw i32 %p5, 5
271  br label %.b7
272
273.b6:
274  %t6 = call i32 @foo()
275  br label %.b7
276
277.b7:
278  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
279  %t7 = add nuw nsw i32 %iv, 1
280  %c7 = icmp eq i32 %t7, %p7
281  br i1 %c7, label %.b1, label %.exit, !prof !3
282
283.exit:
284  ret i32 10
285}
286
287;     b1
288;    /  \
289;   b2  b6
290;  /  \  |
291; b3  b4 |
292;  \  /  |
293;   b5   |
294;    \  /
295;     b7
296; preheader: 1000
297; b2: 15
298; b3: 7
299; b4: 7
300; Regardless of aliasing store in loop this load from constant memory can be sunk.
301; CHECK: t5_const_memory
302; CHECK: .preheader:
303; CHECK-NOT: load i32, ptr @g_const
304; CHECK: .b2:
305; CHECK: load i32, ptr @g_const
306; CHECK: br i1 %c2, label %.b3, label %.b4
307define i32 @t5_const_memory(i32, ptr) #0 !prof !0 {
308  %3 = icmp eq i32 %0, 0
309  br i1 %3, label %.exit, label %.preheader
310
311.preheader:
312  %invariant = load i32, ptr @g_const
313  br label %.b1
314
315.b1:
316  %iv = phi i32 [ %t7, %.b7 ], [ 0, %.preheader ]
317  %c1 = icmp sgt i32 %iv, %0
318  br i1 %c1, label %.b2, label %.b6, !prof !1
319
320.b2:
321  %c2 = icmp sgt i32 %iv, 1
322  br i1 %c2, label %.b3, label %.b4
323
324.b3:
325  %t3 = sub nsw i32 %invariant, %iv
326  br label %.b5
327
328.b4:
329  %t4 = add nsw i32 %invariant, %iv
330  br label %.b5
331
332.b5:
333  %p5 = phi i32 [ %t3, %.b3 ], [ %t4, %.b4 ]
334  %t5 = mul nsw i32 %p5, 5
335  br label %.b7
336
337.b6:
338  %t6 = call i32 @foo()
339  br label %.b7
340
341.b7:
342  %p7 = phi i32 [ %t6, %.b6 ], [ %t5, %.b5 ]
343  %t7 = add nuw nsw i32 %iv, 1
344  %c7 = icmp eq i32 %t7, %p7
345  br i1 %c7, label %.b1, label %.exit, !prof !3
346
347.exit:
348  ret i32 10
349}
350
351;     b1
352;    /  \
353;   b2  b3
354;    \  /
355;     b4
356; preheader: 1000
357; b2: 15
358; b3: 7
359; Do not sink unordered atomic load to b2
360; CHECK: t6
361; CHECK: .preheader:
362; CHECK:  load atomic i32, ptr @g unordered, align 4
363; CHECK: .b2:
364; CHECK-NOT: load atomic i32, ptr @g unordered, align 4
365define i32 @t6(i32, i32) #0 !prof !0 {
366  %3 = icmp eq i32 %1, 0
367  br i1 %3, label %.exit, label %.preheader
368
369.preheader:
370  %invariant = load atomic i32, ptr @g unordered, align 4
371  br label %.b1
372
373.b1:
374  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
375  %c1 = icmp sgt i32 %iv, %0
376  br i1 %c1, label %.b2, label %.b3, !prof !1
377
378.b2:
379  %t1 = add nsw i32 %invariant, %iv
380  br label %.b4
381
382.b3:
383  %t2 = add nsw i32 %iv, 100
384  br label %.b4
385
386.b4:
387  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
388  %t3 = add nuw nsw i32 %iv, 1
389  %c2 = icmp eq i32 %t3, %p1
390  br i1 %c2, label %.b1, label %.exit, !prof !3
391
392.exit:
393  ret i32 10
394}
395
396@g_const = constant i32 0, align 4
397
398;     b1
399;    /  \
400;   b2  b3
401;    \  /
402;     b4
403; preheader: 1000
404; b2: 0.5
405; b3: 999.5
406; Sink unordered atomic load to b2. It is allowed to sink into loop unordered
407; load from constant.
408; CHECK: t7
409; CHECK: .preheader:
410; CHECK-NOT:  load atomic i32, ptr @g_const unordered, align 4
411; CHECK: .b2:
412; CHECK: load atomic i32, ptr @g_const unordered, align 4
413define i32 @t7(i32, i32) #0 !prof !0 {
414  %3 = icmp eq i32 %1, 0
415  br i1 %3, label %.exit, label %.preheader
416
417.preheader:
418  %invariant = load atomic i32, ptr @g_const unordered, align 4
419  br label %.b1
420
421.b1:
422  %iv = phi i32 [ %t3, %.b4 ], [ 0, %.preheader ]
423  %c1 = icmp sgt i32 %iv, %0
424  br i1 %c1, label %.b2, label %.b3, !prof !1
425
426.b2:
427  %t1 = add nsw i32 %invariant, %iv
428  br label %.b4
429
430.b3:
431  %t2 = add nsw i32 %iv, 100
432  br label %.b4
433
434.b4:
435  %p1 = phi i32 [ %t2, %.b3 ], [ %t1, %.b2 ]
436  %t3 = add nuw nsw i32 %iv, 1
437  %c2 = icmp eq i32 %t3, %p1
438  br i1 %c2, label %.b1, label %.exit, !prof !3
439
440.exit:
441  ret i32 10
442}
443
444declare i32 @foo()
445
446!0 = !{!"function_entry_count", i64 1}
447!1 = !{!"branch_weights", i32 1, i32 2000}
448!2 = !{!"branch_weights", i32 2000, i32 1}
449!3 = !{!"branch_weights", i32 100, i32 1}
450