xref: /llvm-project/llvm/test/Transforms/SampleProfile/profile-context-tracker.ll (revision 45b155924e1662dc69883d14149908434f77094f)
1; Test for CSSPGO's SampleContextTracker to make sure context profile tree is promoted and merged properly
2; based on inline decision, so post inline counts are accurate.
3
4; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/profile-context-tracker.prof -o %t
5
6; Note that we need new pass manager to enable top-down processing for sample profile loader
7; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
8;   main:3 @ _Z5funcAi
9;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
10;   _Z5funcBi:1 @ _Z8funcLeafi
11; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
12; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
13; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
14; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -sample-profile-even-flow-distribution=0 -S | FileCheck %s --check-prefix=INLINE-ALL
15;
16; Test we inlined the following in top-down order and entry counts accurate reflects post-inline base profile
17;   _Z5funcAi:1 @ _Z8funcLeafi
18;   _Z5funcBi:1 @ _Z8funcLeafi
19
20; Test the functions won't be inlined as a result of sampled profile if `disable-sample-loader-inlining` is true.
21;
22; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -disable-sample-loader-inlining -S | FileCheck %s --check-prefix=INLINE-NONE
23; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -disable-sample-loader-inlining -S | FileCheck %s --check-prefix=INLINE-NONE
24; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -disable-sample-loader-inlining -S | FileCheck %s --check-prefix=INLINE-NONE
25; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -disable-sample-loader-inlining -S | FileCheck %s --check-prefix=INLINE-NONE
26
27
28@factor = dso_local global i32 3, align 4, !dbg !0
29
30define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
31; INLINE-ALL: @main{{.*}}!prof ![[MAIN_PROF:[0-9]+]]
32; INLINE-HOT: @main{{.*}}!prof ![[MAIN_PROF:[0-9]+]]
33; INLINE-NONE: @main{{.*}}!prof ![[MAIN_PROF:[0-9]+]]
34entry:
35  br label %for.body, !dbg !25
36
37for.cond.cleanup:                                 ; preds = %for.body
38  ret i32 %add3, !dbg !27
39
40for.body:                                         ; preds = %for.body, %entry
41  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
42  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
43  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
44; _Z5funcBi is marked noinline
45; INLINE-ALL: call i32 @_Z5funcBi
46; INLINE-HOT: call i32 @_Z5funcBi
47;
48; _Z5funcBi isn't inlined since disable-sample-loader-inlining is true.
49; INLINE-NONE:  call i32 @_Z5funcBi
50  %add = add nuw nsw i32 %x.011, 1, !dbg !31
51  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
52; INLINE-ALL-NOT: call i32 @_Z5funcAi
53; INLINE-HOT: call i32 @_Z5funcAi
54;
55; _Z5funcAi is not inlined since `disable-sample-loader-inlining` is true.
56; INLINE-NONE: call i32 @_Z5funcAi
57  %add2 = add i32 %call, %r.010, !dbg !34
58  %add3 = add i32 %add2, %call1, !dbg !35
59  %dec = add nsw i32 %x.011, -1, !dbg !36
60  %cmp = icmp eq i32 %x.011, 0, !dbg !38
61  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
62}
63
64define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
65; _Z5funcAi is inlined, so outline remainder should have zero counts
66; INLINE-ALL: @_Z5funcAi{{.*}}!prof ![[FUNCA_PROF:[0-9]+]]
67; INLINE-HOT: @_Z5funcAi{{.*}}!prof ![[FUNCA_PROF:[0-9]+]]
68; INLINE-NONE: @_Z5funcAi{{.*}}!prof ![[FUNCA_PROF:[0-9]+]]
69entry:
70  %add = add nsw i32 %x, 100000, !dbg !44
71; _Z8funcLeafi is already inlined on main->_Z5funcAi->_Z8funcLeafi,
72; so it should not be inlined on _Z5funcAi->_Z8funcLeafi based on updated
73; (merged and promoted) context profile
74; INLINE-ALL: call i32 @_Z8funcLeafi
75; INLINE-HOT-NOT: call i32 @_Z8funcLeafi
76;
77; `_Z8funcLeafi` isn't inlined if `disable-sample-loader-inlining` is true.
78; INLINE-NONE: call i32 @_Z8funcLeafi
79  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
80  ret i32 %call, !dbg !46
81}
82
83define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
84; main->_Z5funcAi->_Z8funcLeafi is inlined, and _Z5funcBi->_Z8funcLeafi is also
85; inlined, so outline remainder should have empty profile
86; INLINE-ALL: @_Z8funcLeafi{{.*}}!prof ![[LEAF_PROF:[0-9]+]]
87; INLINE-HOT: @_Z8funcLeafi{{.*}}!prof ![[LEAF_PROF:[0-9]+]]
88;
89; _Z8funcLeafi won't be inlined if `disable-sample-loader-inlining` is true.
90; INLINE-NONE: @_Z8funcLeafi{{.*}}!prof ![[LEAF_PROF:[0-9]+]]
91entry:
92  %cmp = icmp sgt i32 %x, 0, !dbg !57
93  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
94
95while.cond2.preheader:                            ; preds = %entry
96  %cmp313 = icmp slt i32 %x, 0, !dbg !60
97  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
98
99while.body:                                       ; preds = %while.body, %entry
100  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
101  %tmp = load volatile i32, ptr @factor, align 4, !dbg !64
102  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
103  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
104  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
105  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
106
107while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
108  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
109  %tmp1 = load volatile i32, ptr @factor, align 4, !dbg !72
110  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
111  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
112  %cmp3 = icmp slt i32 %add, 0, !dbg !60
113  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
114
115if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
116  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
117  ret i32 %x.addr.2, !dbg !76
118}
119
120define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
121; _Z5funcBi is marked noinline, so outline remainder has promoted context profile
122; INLINE-ALL: @_Z5funcBi{{.*}}!prof ![[FUNCB_PROF:[0-9]+]]
123; INLINE-HOT: @_Z5funcBi{{.*}}!prof ![[FUNCB_PROF:[0-9]+]]
124; _Z5funcBi won't be inlined since `disable-sample-loader-inlining` is true.
125; INLINE-NONE: @_Z5funcBi{{.*}}!prof ![[FUNCB_PROF:[0-9]+]]
126entry:
127  %sub = add nsw i32 %x, -100000, !dbg !51
128  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
129; _Z5funcBi is not inlined into main, so we main->_Z5funcBi->_Z8funcLeafi
130; should be inlined based on promoted context profile
131; INLINE-ALL-NOT: call i32 @_Z8funcLeafi
132; INLINE-HOT-NOT: call i32 @_Z8funcLeafi
133;
134; INLINE-NONE: call i32 @_Z8funcLeafi
135  ret i32 %call, !dbg !53
136}
137
138; INLINE-ALL-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}
139; INLINE-ALL-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 0}
140; INLINE-ALL-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
141; INLINE-ALL-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}
142
143; INLINE-HOT-DAG: [[MAIN_PROF]] = !{!"function_entry_count", i64 1}
144; INLINE-HOT-DAG: [[FUNCA_PROF]] = !{!"function_entry_count", i64 12}
145; INLINE-HOT-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 0}
146; INLINE-HOT-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 13}
147
148; INLINE-NONE: [[MAIN_PROF]] = !{!"function_entry_count", i64 14}
149; INLINE-NONE: [[FUNCA_PROF]] = !{!"function_entry_count", i64 24}
150; INLINE-NONE-DAG-SAME: [[LEAF_PROF]] = !{!"function_entry_count", i64 21}
151; INLINE-NONE-DAG: [[FUNCB_PROF]] = !{!"function_entry_count", i64 32}
152
153declare i32 @_Z3fibi(i32)
154
155attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
156attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
157
158!llvm.dbg.cu = !{!2}
159!llvm.module.flags = !{!14, !15, !16}
160!llvm.ident = !{!17}
161
162!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
163!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
164!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
165!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
166!4 = !{}
167!5 = !{!6, !10, !11}
168!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
169!7 = !DISubroutineType(types: !8)
170!8 = !{!9, !9}
171!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
172!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
173!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
174!12 = !{!0}
175!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
176!14 = !{i32 7, !"Dwarf Version", i32 4}
177!15 = !{i32 2, !"Debug Info Version", i32 3}
178!16 = !{i32 1, !"wchar_size", i32 4}
179!17 = !{!"clang version 11.0.0"}
180!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
181!19 = !DISubroutineType(types: !20)
182!20 = !{!9}
183!21 = !{!22, !23}
184!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
185!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
186!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
187!25 = !DILocation(line: 13, column: 3, scope: !26)
188!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
189!27 = !DILocation(line: 17, column: 3, scope: !18)
190!28 = !DILocation(line: 14, column: 10, scope: !29)
191!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
192!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
193!31 = !DILocation(line: 14, column: 29, scope: !29)
194!32 = !DILocation(line: 14, column: 21, scope: !33)
195!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
196!34 = !DILocation(line: 14, column: 19, scope: !29)
197!35 = !DILocation(line: 14, column: 7, scope: !29)
198!36 = !DILocation(line: 13, column: 33, scope: !37)
199!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
200!38 = !DILocation(line: 13, column: 26, scope: !39)
201!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
202!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
203!44 = !DILocation(line: 27, column: 22, scope: !40)
204!45 = !DILocation(line: 27, column: 11, scope: !40)
205!46 = !DILocation(line: 29, column: 3, scope: !40)
206!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
207!51 = !DILocation(line: 33, column: 22, scope: !47)
208!52 = !DILocation(line: 33, column: 11, scope: !47)
209!53 = !DILocation(line: 35, column: 3, scope: !47)
210!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
211!57 = !DILocation(line: 49, column: 9, scope: !58)
212!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
213!59 = !DILocation(line: 49, column: 7, scope: !54)
214!60 = !DILocation(line: 58, column: 14, scope: !61)
215!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
216!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
217!63 = !DILocation(line: 58, column: 5, scope: !61)
218!64 = !DILocation(line: 52, column: 16, scope: !65)
219!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
220!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
221!67 = !DILocation(line: 52, column: 12, scope: !65)
222!68 = !DILocation(line: 52, column: 9, scope: !65)
223!69 = !DILocation(line: 51, column: 14, scope: !70)
224!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
225!71 = !DILocation(line: 51, column: 5, scope: !70)
226!72 = !DILocation(line: 59, column: 16, scope: !73)
227!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
228!74 = !DILocation(line: 59, column: 12, scope: !73)
229!75 = !DILocation(line: 59, column: 9, scope: !73)
230!76 = !DILocation(line: 63, column: 3, scope: !54)
231