xref: /llvm-project/llvm/test/Transforms/LoopRotate/update-branch-weights.ll (revision 896037c75ace929327e5b0bf5832157f9d81e6e7)
1; RUN: opt < %s -passes='print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_BEFORE
2; RUN: opt < %s -passes='loop(loop-rotate),print<block-freq>' -disable-output 2>&1 | FileCheck %s --check-prefixes=BFI_AFTER
3; RUN: opt < %s -passes='loop(loop-rotate)' -S | FileCheck %s --check-prefixes=IR
4
5@g = global i32 0
6
7; We should get the same "count =" results for "outer_loop_body" and
8; "inner_loop_body" before and after the transformation.
9
10; BFI_BEFORE-LABEL: block-frequency-info: func0
11; BFI_BEFORE: - entry: {{.*}} count = 1
12; BFI_BEFORE: - outer_loop_header: {{.*}} count = 1001
13; BFI_BEFORE: - outer_loop_body: {{.*}} count = 1000
14; BFI_BEFORE: - inner_loop_header: {{.*}} count = 4000
15; BFI_BEFORE: - inner_loop_body: {{.*}} count = 3000
16; BFI_BEFORE: - inner_loop_exit: {{.*}} count = 1000
17; BFI_BEFORE: - outer_loop_exit: {{.*}} count = 1
18
19; BFI_AFTER-LABEL: block-frequency-info: func0
20; BFI_AFTER: - entry: {{.*}} count = 1
21; BFI_AFTER: - outer_loop_body: {{.*}} count = 1000
22; BFI_AFTER: - inner_loop_body: {{.*}} count = 3000
23; BFI_AFTER: - inner_loop_exit: {{.*}} count = 1000
24; BFI_AFTER: - outer_loop_exit: {{.*}} count = 1
25
26; IR-LABEL: define void @func0
27; IR: inner_loop_body:
28; IR:   br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof [[PROF_FUNC0_0:![0-9]+]]
29; IR: inner_loop_exit:
30; IR:   br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof [[PROF_FUNC0_1:![0-9]+]]
31;
32; A function with known loop-bounds where after loop-rotation we end with an
33; unconditional branch in the pre-header.
34define void @func0() !prof !0 {
35entry:
36  br label %outer_loop_header
37
38outer_loop_header:
39  %i0 = phi i32 [0, %entry], [%i0_inc, %inner_loop_exit]
40  %cmp0 = icmp slt i32 %i0, 1000
41  br i1 %cmp0, label %outer_loop_body, label %outer_loop_exit, !prof !1
42
43outer_loop_body:
44  store volatile i32 %i0, ptr @g, align 4
45  br label %inner_loop_header
46
47inner_loop_header:
48  %i1 = phi i32 [0, %outer_loop_body], [%i1_inc, %inner_loop_body]
49  %cmp1 = icmp slt i32 %i1, 3
50  br i1 %cmp1, label %inner_loop_body, label %inner_loop_exit, !prof !2
51
52inner_loop_body:
53  store volatile i32 %i1, ptr @g, align 4
54  %i1_inc = add i32 %i1, 1
55  br label %inner_loop_header
56
57inner_loop_exit:
58  %i0_inc = add i32 %i0, 1
59  br label %outer_loop_header
60
61outer_loop_exit:
62  ret void
63}
64
65; BFI_BEFORE-LABEL: block-frequency-info: func1
66; BFI_BEFORE: - entry: {{.*}} count = 1024
67; BFI_BEFORE: - loop_header: {{.*}} count = 21504
68; BFI_BEFORE: - loop_body: {{.*}} count = 20480
69; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
70
71; BFI_AFTER-LABEL: block-frequency-info: func1
72; BFI_AFTER: - entry: {{.*}} count = 1024
73; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1016
74; BFI_AFTER: - loop_body: {{.*}} count = 20480
75; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1016
76; BFI_AFTER: - loop_exit: {{.*}} count = 1024
77
78; IR-LABEL: define void @func1
79; IR: entry:
80; IR:   br i1 %cmp1, label %loop_body.lr.ph, label %loop_exit, !prof [[PROF_FUNC1_0:![0-9]+]]
81
82; IR: loop_body:
83; IR:   br i1 %cmp, label %loop_body, label %loop_header.loop_exit_crit_edge, !prof [[PROF_FUNC1_1:![0-9]+]]
84
85; A function with unknown loop-bounds so loop-rotation ends up with a
86; condition jump in pre-header and loop body. branch_weight shows body is
87; executed more often than header.
88define void @func1(i32 %n) !prof !3 {
89entry:
90  br label %loop_header
91
92loop_header:
93  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
94  %cmp = icmp slt i32 %i, %n
95  br i1 %cmp, label %loop_body, label %loop_exit, !prof !4
96
97loop_body:
98  store volatile i32 %i, ptr @g, align 4
99  %i_inc = add i32 %i, 1
100  br label %loop_header
101
102loop_exit:
103  ret void
104}
105
106; BFI_BEFORE-LABEL: block-frequency-info: func2
107; BFI_BEFORE: - entry: {{.*}} count = 1024
108; BFI_BEFORE: - loop_header: {{.*}} count = 1056
109; BFI_BEFORE: - loop_body: {{.*}} count = 32
110; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
111
112; BFI_AFTER-LABEL: block-frequency-info: func2
113; - entry: {{.*}} count = 1024
114; - loop_body.lr.ph: {{.*}} count = 32
115; - loop_body: {{.*}} count = 32
116; - loop_header.loop_exit_crit_edge: {{.*}} count = 32
117; - loop_exit: {{.*}} count = 1024
118
119; IR-LABEL: define void @func2
120; IR: entry:
121; IR:   br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC2_0:![0-9]+]]
122
123; IR: loop_body:
124; IR:   br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC2_1:![0-9]+]]
125
126; A function with unknown loop-bounds so loop-rotation ends up with a
127; condition jump in pre-header and loop body. Similar to `func1` but here
128; loop-exit count is higher than backedge count.
129define void @func2(i32 %n) !prof !3 {
130entry:
131  br label %loop_header
132
133loop_header:
134  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
135  %cmp = icmp slt i32 %i, %n
136  br i1 %cmp, label %loop_exit, label %loop_body, !prof !5
137
138loop_body:
139  store volatile i32 %i, ptr @g, align 4
140  %i_inc = add i32 %i, 1
141  br label %loop_header
142
143loop_exit:
144  ret void
145}
146
147; BFI_BEFORE-LABEL: block-frequency-info: func3_zero_branch_weight
148; BFI_BEFORE: - entry: {{.*}} count = 1024
149; BFI_BEFORE: - loop_header: {{.*}} count = 2199023255552
150; BFI_BEFORE: - loop_body: {{.*}} count = 2199023254528
151; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
152
153; BFI_AFTER-LABEL: block-frequency-info: func3_zero_branch_weight
154; BFI_AFTER: - entry: {{.*}} count = 1024
155; BFI_AFTER: - loop_body.lr.ph: {{.*}} count = 1024
156; BFI_AFTER: - loop_body: {{.*}} count = 2199023255552
157; BFI_AFTER: - loop_header.loop_exit_crit_edge: {{.*}} count = 1024
158; BFI_AFTER: - loop_exit: {{.*}} count = 1024
159
160; IR-LABEL: define void @func3_zero_branch_weight
161; IR: entry:
162; IR:   br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC3_0:![0-9]+]]
163
164; IR: loop_body:
165; IR:   br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC3_0]]
166
167define void @func3_zero_branch_weight(i32 %n) !prof !3 {
168entry:
169  br label %loop_header
170
171loop_header:
172  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
173  %cmp = icmp slt i32 %i, %n
174  br i1 %cmp, label %loop_exit, label %loop_body, !prof !6
175
176loop_body:
177  store volatile i32 %i, ptr @g, align 4
178  %i_inc = add i32 %i, 1
179  br label %loop_header
180
181loop_exit:
182  ret void
183}
184
185; IR-LABEL: define void @func4_zero_branch_weight
186; IR: entry:
187; IR:   br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC4_0:![0-9]+]]
188
189; IR: loop_body:
190; IR:   br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC4_0]]
191
192define void @func4_zero_branch_weight(i32 %n) !prof !3 {
193entry:
194  br label %loop_header
195
196loop_header:
197  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
198  %cmp = icmp slt i32 %i, %n
199  br i1 %cmp, label %loop_exit, label %loop_body, !prof !7
200
201loop_body:
202  store volatile i32 %i, ptr @g, align 4
203  %i_inc = add i32 %i, 1
204  br label %loop_header
205
206loop_exit:
207  ret void
208}
209
210; IR-LABEL: define void @func5_zero_branch_weight
211; IR: entry:
212; IR:   br i1 %cmp1, label %loop_exit, label %loop_body.lr.ph, !prof [[PROF_FUNC5_0:![0-9]+]]
213
214; IR: loop_body:
215; IR:   br i1 %cmp, label %loop_header.loop_exit_crit_edge, label %loop_body, !prof [[PROF_FUNC5_0]]
216
217define void @func5_zero_branch_weight(i32 %n) !prof !3 {
218entry:
219  br label %loop_header
220
221loop_header:
222  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
223  %cmp = icmp slt i32 %i, %n
224  br i1 %cmp, label %loop_exit, label %loop_body, !prof !8
225
226loop_body:
227  store volatile i32 %i, ptr @g, align 4
228  %i_inc = add i32 %i, 1
229  br label %loop_header
230
231loop_exit:
232  ret void
233}
234
235; BFI_BEFORE-LABEL: block-frequency-info: func6_inaccurate_branch_weight
236; BFI_BEFORE: - entry: {{.*}} count = 1024
237; BFI_BEFORE: - loop_header: {{.*}} count = 2047
238; BFI_BEFORE: - loop_body: {{.*}} count = 1023
239; BFI_BEFORE: - loop_exit: {{.*}} count = 1024
240
241; BFI_AFTER-LABEL: block-frequency-info: func6_inaccurate_branch_weight
242; BFI_AFTER: - entry: {{.*}} count = 1024
243; BFI_AFTER: - loop_body: {{.*}} count = 1024
244; BFI_AFTER: - loop_exit: {{.*}} count = 1024
245
246; IR-LABEL: define void @func6_inaccurate_branch_weight(
247; IR: entry:
248; IR:   br label %loop_body
249; IR: loop_body:
250; IR:   br i1 %cmp, label %loop_body, label %loop_exit, !prof [[PROF_FUNC6_0:![0-9]+]]
251; IR: loop_exit:
252; IR:   ret void
253
254; Branch weight from sample-based PGO may be inaccurate due to sampling.
255; Count for loop_body in following case should be not less than loop_exit.
256; However this may not hold for Sample-based PGO.
257define void @func6_inaccurate_branch_weight() !prof !3 {
258entry:
259  br label %loop_header
260
261loop_header:
262  %i = phi i32 [0, %entry], [%i_inc, %loop_body]
263  %cmp = icmp slt i32 %i, 2
264  br i1 %cmp, label %loop_body, label %loop_exit, !prof !9
265
266loop_body:
267  store volatile i32 %i, ptr @g, align 4
268  %i_inc = add i32 %i, 1
269  br label %loop_header
270
271loop_exit:
272  ret void
273}
274
275!0 = !{!"function_entry_count", i64 1}
276!1 = !{!"branch_weights", i32 1000, i32 1}
277!2 = !{!"branch_weights", i32 3000, i32 1000}
278!3 = !{!"function_entry_count", i64 1024}
279!4 = !{!"branch_weights", i32 40, i32 2}
280!5 = !{!"branch_weights", i32 10240, i32 320}
281!6 = !{!"branch_weights", i32 0, i32 1}
282!7 = !{!"branch_weights", i32 1, i32 0}
283!8 = !{!"branch_weights", i32 0, i32 0}
284!9 = !{!"branch_weights", i32 1023, i32 1024}
285
286; IR: [[PROF_FUNC0_0]] = !{!"branch_weights", i32 2000, i32 1000}
287; IR: [[PROF_FUNC0_1]] = !{!"branch_weights", i32 999, i32 1}
288; IR: [[PROF_FUNC1_0]] = !{!"branch_weights", i32 127, i32 1}
289; IR: [[PROF_FUNC1_1]] = !{!"branch_weights", i32 2433, i32 127}
290; IR: [[PROF_FUNC2_0]] = !{!"branch_weights", i32 9920, i32 320}
291; IR: [[PROF_FUNC2_1]] = !{!"branch_weights", i32 320, i32 0}
292; IR: [[PROF_FUNC3_0]] = !{!"branch_weights", i32 0, i32 1}
293; IR: [[PROF_FUNC4_0]] = !{!"branch_weights", i32 1, i32 0}
294; IR: [[PROF_FUNC5_0]] = !{!"branch_weights", i32 0, i32 0}
295; IR: [[PROF_FUNC6_0]] = !{!"branch_weights", i32 0, i32 1024}
296