xref: /llvm-project/llvm/test/Transforms/SampleProfile/profile-correlation-irreducible-loops.ll (revision e3cf80c5c1fe55efd8216575ccadea0ab087e79c)
1; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 | opt -passes='print<block-freq>' -disable-output  -use-iterative-bfi-inference 2>&1 | FileCheck %s
2; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-correlation-irreducible-loops.prof -sample-profile-use-profi=0 -S | FileCheck %s --check-prefix=CHECK2
3; RUN: opt < %s -passes='print<block-freq>' -use-iterative-bfi-inference -disable-output 2>&1 | FileCheck %s --check-prefix=CHECK3
4
5; The C++ code for this test case is from c-parse.c in 403.gcc (SPEC2006)
6; The problem with BFI for the test is solved by applying iterative inference.
7; The corresponding CFG graph is shown below, with intended counts for every
8; basic block. The hot loop, b3->b4->b2, is not getting proper (large) counts
9; unless the -use-iterative-bfi-inference option is specified.
10;
11;   +-------------------------------------------+
12;   |                                           |
13;   |                   +----------+            |
14;   |                   |  b1 [1]  |            |
15;   |                   +----------+            |
16;   |                     |                     |
17;   |                     |                     |
18;   |                     v                     |
19;   |                   +----------+            |
20;   |    +------------> | b2 [625] | -+         |
21;   |    |              +----------+  |         |
22;   |    |                |           |         |
23;   |    |                |           |         |
24;   |    |                v           |         |
25;   |  +----------+     +----------+  |         |
26;   |  | b4 [624] | <-- | b3 [625] | <+---------+
27;   |  +----------+     +----------+  |
28;   |                     |           |
29;   +----+                |           |
30;        |                v           v
31;      +----------+     +--------------------+
32;      |  b8 [1]  | <-- |       b7 [2]       |
33;      +----------+     +--------------------+
34;                         |           ^
35;                         |           |
36;                         v           |
37;      +----------+     +----------+  |
38;      |  b9 [1]  | <-- |  b5 [2]  |  |
39;      +----------+     +----------+  |
40;                         |           |
41;                         |           |
42;                         v           |
43;                       +----------+  |
44;                       |  b6 [1]  | -+
45;                       +----------+
46
47@yydebug = dso_local global i32 0, align 4
48
49; Function Attrs: noinline nounwind uwtable
50define dso_local i32 @yyparse_1() #0 {
51b1:
52  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 1, i32 0, i64 -1)
53  %0 = load i32, ptr @yydebug, align 4
54  %cmp = icmp ne i32 %0, 0
55  br label %b2
56; CHECK: - b1: float = {{.*}}, int = {{.*}}, count = 1
57
58b2:
59  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 2, i32 0, i64 -1)
60  br i1 %cmp, label %b7, label %b3
61; CHECK: - b2: float = {{.*}}, int = {{.*}}, count = 586
62
63b3:
64  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 3, i32 0, i64 -1)
65  br i1 %cmp, label %b7, label %b4
66; CHECK: - b3: float = {{.*}}, int = {{.*}}, count = 586
67; CHECK2: br i1 %cmp, label %b7, label %b4,
68; CHECK2-SAME: !prof ![[END172_PROF:[0-9]+]]
69
70b4:
71  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 4, i32 0, i64 -1)
72  br label %b2
73; CHECK: - b4: float = {{.*}}, int = {{.*}}, count = 585
74
75b5:
76  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 5, i32 0, i64 -1)
77  br i1 %cmp, label %b9, label %b6
78; CHECK: - b5: float = {{.*}}, int = {{.*}}, count = 2
79
80b6:
81  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 6, i32 0, i64 -1)
82  br label %b7
83; CHECK: - b6: float = {{.*}}, int = {{.*}}, count = 1
84
85b7:
86  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 7, i32 0, i64 -1)
87  br i1 %cmp, label %b5, label %b8
88; CHECK: - b7: float = {{.*}}, int = {{.*}}, count = 2
89; CHECK2: br i1 %cmp, label %b5, label %b8,
90; CHECK2-SAME: !prof ![[FALSE4858_PROF:[0-9]+]]
91
92b8:
93  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 8, i32 0, i64 -1)
94  br label %b3
95; CHECK: - b8: float = {{.*}}, int = {{.*}}, count = 1
96
97b9:
98  call void @llvm.pseudoprobe(i64 -7702751003264189226, i64 9, i32 0, i64 -1)
99  %1 = load i32, ptr @yydebug, align 4
100  ret i32 %1
101; CHECK: - b9: float = {{.*}}, int = {{.*}}, count = 1
102
103}
104
105; Another difficult (for BFI) instance with irreducible loops,
106; containing 'indirectbr'. The corresponding CFG graph is shown below, with
107; intended counts for every basic block.
108;
109;      +-----------+
110;      |  b1 [1]   |
111;      +-----------+
112;        |
113;        |
114;        v
115;      +------------------------+
116;   +- |        b2 [86]         | <+
117;   |  +------------------------+  |
118;   |    |            |            |
119;   |    |            |            |
120;   |    v            |            |
121;   |  +-----------+  |            |
122;   |  | b3 [8212] | <+-------+    |
123;   |  +-----------+  |       |    |
124;   |    |            |       |    |
125;   |    |            |       |    |
126;   |    v            v       |    |
127;   |  +------------------------+  |
128;   |  |  indirectgoto [17747]  | -+
129;   |  +------------------------+
130;   |    |            ^  |
131;   |    |            +--+
132;   |    v
133;   |  +-----------+
134;   +> |  b4 [1]   |
135;      +-----------+
136
137; Function Attrs: nounwind uwtable
138define dso_local i32 @foo1() #0 !prof !132 {
139b1:
140  call void @llvm.pseudoprobe(i64 7682762345278052905, i64 1, i32 0, i64 -1)
141  %0 = load i32, ptr @yydebug, align 4
142  %cmp = icmp ne i32 %0, 0
143  br label %b2
144; CHECK3: - b1: float = {{.*}}, int = {{.*}}, count = 1
145
146b2:
147  call void @llvm.pseudoprobe(i64 7682762345278052905, i64 2, i32 0, i64 -1)
148  %1 = load i32, ptr @yydebug, align 4
149  switch i32 %1, label %b4 [
150    i32 1, label %indirectgoto
151    i32 2, label %b3
152  ], !prof !133
153; CHECK3: - b2: float = {{.*}}, int = {{.*}}, count = 86
154
155b3:
156  call void @llvm.pseudoprobe(i64 7682762345278052905, i64 3, i32 0, i64 -1)
157  br label %indirectgoto
158; CHECK3: - b3: float = {{.*}}, int = {{.*}}, count = 8212
159
160b4:
161  call void @llvm.pseudoprobe(i64 7682762345278052905, i64 4, i32 0, i64 -1)
162  %2 = load i32, ptr @yydebug, align 4
163  ret i32 %2
164; CHECK3: - b4: float = {{.*}}, int = {{.*}}, count = 1
165
166indirectgoto:
167  %indirect.goto.dest = alloca i8, align 4
168  call void @llvm.pseudoprobe(i64 7682762345278052905, i64 5, i32 0, i64 -1)
169  indirectbr ptr %indirect.goto.dest, [label %b2, label %indirectgoto, label %b4, label %b3], !prof !134
170; CHECK3: - indirectgoto: float = {{.*}}, int = {{.*}}, count = 17747
171
172}
173
174declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
175
176attributes #0 = { noinline nounwind uwtable "use-sample-profile"}
177attributes #1 = { nounwind }
178
179!llvm.pseudo_probe_desc = !{!1079, !4496}
180!1079 = !{i64 -7702751003264189226, i64 158496288380146391, !"yyparse_1", null}
181!4496 = !{i64 7682762345278052905, i64 404850113186107133, !"foo1", null}
182!132 = !{!"function_entry_count", i64 1}
183!133 = !{!"branch_weights", i32 0, i32 86, i32 0}
184!134 = !{!"branch_weights", i32 85, i32 9449, i32 1, i32 8212}
185
186; CHECK2: ![[END172_PROF]] = !{!"branch_weights", i32 1, i32 1003}
187; CHECK2: ![[FALSE4858_PROF]] = !{!"branch_weights", i32 2, i32 1}
188