xref: /llvm-project/llvm/test/Transforms/SampleProfile/csspgo-inline.ll (revision 6bae5973c476e16dbbc82030d65c7859a6628e89)
1; Test for CSSPGO's new early inliner using priority queue
2
3; Note that we need new pass manager to enable top-down processing for sample profile loader
4; Test we inlined the following in top-down order with old inliner
5;   main:3 @ _Z5funcAi
6;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
7;   _Z5funcBi:1 @ _Z8funcLeafi
8; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
9;
10; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
11; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
12;
13; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining
14; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
15;
16; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining
17; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
18;
19; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob.
20; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1
21; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999900 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2
22
23
24; INLINE-BASE: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
25; INLINE-BASE: remark: merged.cpp:27:11: _Z8funcLeafi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10
26; INLINE-BASE: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
27
28; INLINE-NEW: remark: merged.cpp:14:10: _Z5funcAi inlined into main to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
29; INLINE-NEW-NOT: remark
30
31; INLINE-NEW-LIMIT1-NOT: remark
32
33; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: _Z8funcLeafi inlined into _Z5funcAi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11
34; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: _Z8funcLeafi inlined into _Z5funcBi to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
35; INLINE-NEW-LIMIT2-NOT: remark
36
37@factor = dso_local global i32 3, align 4, !dbg !0
38
39define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
40entry:
41  br label %for.body, !dbg !25
42
43for.cond.cleanup:                                 ; preds = %for.body
44  ret i32 %add3, !dbg !27
45
46for.body:                                         ; preds = %for.body, %entry
47  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
48  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
49  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
50  %add = add nuw nsw i32 %x.011, 1, !dbg !31
51  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
52  %add2 = add i32 %call, %r.010, !dbg !34
53  %add3 = add i32 %add2, %call1, !dbg !35
54  %dec = add nsw i32 %x.011, -1, !dbg !36
55  %cmp = icmp eq i32 %x.011, 0, !dbg !38
56  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
57}
58
59define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
60entry:
61  %add = add nsw i32 %x, 100000, !dbg !44
62  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
63  ret i32 %call, !dbg !46
64}
65
66define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
67entry:
68  %cmp = icmp sgt i32 %x, 0, !dbg !57
69  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
70
71while.cond2.preheader:                            ; preds = %entry
72  %cmp313 = icmp slt i32 %x, 0, !dbg !60
73  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
74
75while.body:                                       ; preds = %while.body, %entry
76  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
77  %tmp = load volatile i32, i32* @factor, align 4, !dbg !64
78  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
79  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
80  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
81  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
82
83while.body4:                                      ; preds = %while.body4, %while.cond2.preheader
84  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
85  %tmp1 = load volatile i32, i32* @factor, align 4, !dbg !72
86  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
87  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
88  %cmp3 = icmp slt i32 %add, 0, !dbg !60
89  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
90
91if.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
92  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
93  ret i32 %x.addr.2, !dbg !76
94}
95
96define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
97entry:
98  %sub = add nsw i32 %x, -100000, !dbg !51
99  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
100  ret i32 %call, !dbg !53
101}
102
103declare i32 @_Z3fibi(i32)
104
105attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
106attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
107
108!llvm.dbg.cu = !{!2}
109!llvm.module.flags = !{!14, !15, !16}
110!llvm.ident = !{!17}
111
112!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
113!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
114!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
115!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
116!4 = !{}
117!5 = !{!6, !10, !11}
118!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
119!7 = !DISubroutineType(types: !8)
120!8 = !{!9, !9}
121!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
122!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
123!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
124!12 = !{!0}
125!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
126!14 = !{i32 7, !"Dwarf Version", i32 4}
127!15 = !{i32 2, !"Debug Info Version", i32 3}
128!16 = !{i32 1, !"wchar_size", i32 4}
129!17 = !{!"clang version 11.0.0"}
130!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
131!19 = !DISubroutineType(types: !20)
132!20 = !{!9}
133!21 = !{!22, !23}
134!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
135!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
136!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
137!25 = !DILocation(line: 13, column: 3, scope: !26)
138!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
139!27 = !DILocation(line: 17, column: 3, scope: !18)
140!28 = !DILocation(line: 14, column: 10, scope: !29)
141!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
142!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
143!31 = !DILocation(line: 14, column: 29, scope: !29)
144!32 = !DILocation(line: 14, column: 21, scope: !33)
145!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
146!34 = !DILocation(line: 14, column: 19, scope: !29)
147!35 = !DILocation(line: 14, column: 7, scope: !29)
148!36 = !DILocation(line: 13, column: 33, scope: !37)
149!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
150!38 = !DILocation(line: 13, column: 26, scope: !39)
151!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
152!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
153!44 = !DILocation(line: 27, column: 22, scope: !40)
154!45 = !DILocation(line: 27, column: 11, scope: !40)
155!46 = !DILocation(line: 29, column: 3, scope: !40)
156!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
157!51 = !DILocation(line: 33, column: 22, scope: !47)
158!52 = !DILocation(line: 33, column: 11, scope: !47)
159!53 = !DILocation(line: 35, column: 3, scope: !47)
160!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
161!57 = !DILocation(line: 49, column: 9, scope: !58)
162!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
163!59 = !DILocation(line: 49, column: 7, scope: !54)
164!60 = !DILocation(line: 58, column: 14, scope: !61)
165!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
166!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
167!63 = !DILocation(line: 58, column: 5, scope: !61)
168!64 = !DILocation(line: 52, column: 16, scope: !65)
169!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
170!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
171!67 = !DILocation(line: 52, column: 12, scope: !65)
172!68 = !DILocation(line: 52, column: 9, scope: !65)
173!69 = !DILocation(line: 51, column: 14, scope: !70)
174!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
175!71 = !DILocation(line: 51, column: 5, scope: !70)
176!72 = !DILocation(line: 59, column: 16, scope: !73)
177!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
178!74 = !DILocation(line: 59, column: 12, scope: !73)
179!75 = !DILocation(line: 59, column: 9, scope: !73)
180!76 = !DILocation(line: 63, column: 3, scope: !54)
181