xref: /llvm-project/llvm/test/Transforms/SampleProfile/csspgo-inline.ll (revision 339b8a0019658746217870215abca77291ad75b3)
16bae5973SWenlei He; Test for CSSPGO's new early inliner using priority queue
26bae5973SWenlei He
36bae5973SWenlei He; Note that we need new pass manager to enable top-down processing for sample profile loader
46bae5973SWenlei He; Test we inlined the following in top-down order with old inliner
56bae5973SWenlei He;   main:3 @ _Z5funcAi
66bae5973SWenlei He;   main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
76bae5973SWenlei He;   _Z5funcBi:1 @ _Z8funcLeafi
86bae5973SWenlei He; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
97ca80300SHongtao Yu
107ca80300SHongtao Yu; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5
117ca80300SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
127ca80300SHongtao Yu
13*339b8a00Swlei; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest  %S/Inputs/profile-context-tracker.prof -o %t.prof
145740bb80SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
155740bb80SHongtao Yu
166bae5973SWenlei He; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
176bae5973SWenlei He; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
185740bb80SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-prioritized-inline -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
196bae5973SWenlei He;
206bae5973SWenlei He; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining
2107846e33SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
226bae5973SWenlei He;
236bae5973SWenlei He; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining
246bae5973SWenlei He; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
256bae5973SWenlei He;
266bae5973SWenlei He; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob.
2707846e33SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1
2807846e33SHongtao Yu; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2
296bae5973SWenlei He
306bae5973SWenlei He
3176093b17SFangrui Song; INLINE-BASE: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
3276093b17SFangrui Song; INLINE-BASE: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10
3376093b17SFangrui Song; INLINE-BASE: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
346bae5973SWenlei He
3576093b17SFangrui Song; INLINE-NEW: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10
366bae5973SWenlei He; INLINE-NEW-NOT: remark
376bae5973SWenlei He
386bae5973SWenlei He; INLINE-NEW-LIMIT1-NOT: remark
396bae5973SWenlei He
4076093b17SFangrui Song; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11
4107846e33SHongtao Yu; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into '_Z5funcAi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11;
426bae5973SWenlei He; INLINE-NEW-LIMIT2-NOT: remark
436bae5973SWenlei He
446bae5973SWenlei He@factor = dso_local global i32 3, align 4, !dbg !0
456bae5973SWenlei He
466bae5973SWenlei Hedefine dso_local i32 @main() local_unnamed_addr #0 !dbg !18 {
476bae5973SWenlei Heentry:
486bae5973SWenlei He  br label %for.body, !dbg !25
496bae5973SWenlei He
506bae5973SWenlei Hefor.cond.cleanup:                                 ; preds = %for.body
516bae5973SWenlei He  ret i32 %add3, !dbg !27
526bae5973SWenlei He
536bae5973SWenlei Hefor.body:                                         ; preds = %for.body, %entry
546bae5973SWenlei He  %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ]
556bae5973SWenlei He  %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
566bae5973SWenlei He  %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32
576bae5973SWenlei He  %add = add nuw nsw i32 %x.011, 1, !dbg !31
586bae5973SWenlei He  %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28
596bae5973SWenlei He  %add2 = add i32 %call, %r.010, !dbg !34
606bae5973SWenlei He  %add3 = add i32 %add2, %call1, !dbg !35
616bae5973SWenlei He  %dec = add nsw i32 %x.011, -1, !dbg !36
626bae5973SWenlei He  %cmp = icmp eq i32 %x.011, 0, !dbg !38
636bae5973SWenlei He  br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25
646bae5973SWenlei He}
656bae5973SWenlei He
666bae5973SWenlei Hedefine dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 {
676bae5973SWenlei Heentry:
686bae5973SWenlei He  %add = add nsw i32 %x, 100000, !dbg !44
696bae5973SWenlei He  %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45
706bae5973SWenlei He  ret i32 %call, !dbg !46
716bae5973SWenlei He}
726bae5973SWenlei He
736bae5973SWenlei Hedefine dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 {
746bae5973SWenlei Heentry:
756bae5973SWenlei He  %cmp = icmp sgt i32 %x, 0, !dbg !57
766bae5973SWenlei He  br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59
776bae5973SWenlei He
786bae5973SWenlei Hewhile.cond2.preheader:                            ; preds = %entry
796bae5973SWenlei He  %cmp313 = icmp slt i32 %x, 0, !dbg !60
806bae5973SWenlei He  br i1 %cmp313, label %while.body4, label %if.end, !dbg !63
816bae5973SWenlei He
826bae5973SWenlei Hewhile.body:                                       ; preds = %while.body, %entry
836bae5973SWenlei He  %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ]
840271ae65SFangrui Song  %tmp = load volatile i32, ptr @factor, align 4, !dbg !64
856bae5973SWenlei He  %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67
866bae5973SWenlei He  %sub = sub nsw i32 %x.addr.016, %call, !dbg !68
876bae5973SWenlei He  %cmp1 = icmp sgt i32 %sub, 0, !dbg !69
886bae5973SWenlei He  br i1 %cmp1, label %while.body, label %if.end, !dbg !71
896bae5973SWenlei He
906bae5973SWenlei Hewhile.body4:                                      ; preds = %while.body4, %while.cond2.preheader
916bae5973SWenlei He  %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ]
920271ae65SFangrui Song  %tmp1 = load volatile i32, ptr @factor, align 4, !dbg !72
936bae5973SWenlei He  %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74
946bae5973SWenlei He  %add = add nsw i32 %call5, %x.addr.114, !dbg !75
956bae5973SWenlei He  %cmp3 = icmp slt i32 %add, 0, !dbg !60
966bae5973SWenlei He  br i1 %cmp3, label %while.body4, label %if.end, !dbg !63
976bae5973SWenlei He
986bae5973SWenlei Heif.end:                                           ; preds = %while.body4, %while.body, %while.cond2.preheader
996bae5973SWenlei He  %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ]
1006bae5973SWenlei He  ret i32 %x.addr.2, !dbg !76
1016bae5973SWenlei He}
1026bae5973SWenlei He
1036bae5973SWenlei Hedefine dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 {
1046bae5973SWenlei Heentry:
1056bae5973SWenlei He  %sub = add nsw i32 %x, -100000, !dbg !51
1066bae5973SWenlei He  %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52
1076bae5973SWenlei He  ret i32 %call, !dbg !53
1086bae5973SWenlei He}
1096bae5973SWenlei He
1106bae5973SWenlei Hedeclare i32 @_Z3fibi(i32)
1116bae5973SWenlei He
1124ab3041aSserge-sans-pailleattributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
1134ab3041aSserge-sans-pailleattributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" }
1146bae5973SWenlei He
1156bae5973SWenlei He!llvm.dbg.cu = !{!2}
1166bae5973SWenlei He!llvm.module.flags = !{!14, !15, !16}
1176bae5973SWenlei He!llvm.ident = !{!17}
1186bae5973SWenlei He
1196bae5973SWenlei He!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
1206bae5973SWenlei He!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true)
1216bae5973SWenlei He!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
1226bae5973SWenlei He!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo")
1236bae5973SWenlei He!4 = !{}
1246bae5973SWenlei He!5 = !{!6, !10, !11}
1256bae5973SWenlei He!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
1266bae5973SWenlei He!7 = !DISubroutineType(types: !8)
1276bae5973SWenlei He!8 = !{!9, !9}
1286bae5973SWenlei He!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
1296bae5973SWenlei He!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
1306bae5973SWenlei He!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4)
1316bae5973SWenlei He!12 = !{!0}
1326bae5973SWenlei He!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9)
1336bae5973SWenlei He!14 = !{i32 7, !"Dwarf Version", i32 4}
1346bae5973SWenlei He!15 = !{i32 2, !"Debug Info Version", i32 3}
1356bae5973SWenlei He!16 = !{i32 1, !"wchar_size", i32 4}
1366bae5973SWenlei He!17 = !{!"clang version 11.0.0"}
1376bae5973SWenlei He!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
1386bae5973SWenlei He!19 = !DISubroutineType(types: !20)
1396bae5973SWenlei He!20 = !{!9}
1406bae5973SWenlei He!21 = !{!22, !23}
1416bae5973SWenlei He!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9)
1426bae5973SWenlei He!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9)
1436bae5973SWenlei He!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3)
1446bae5973SWenlei He!25 = !DILocation(line: 13, column: 3, scope: !26)
1456bae5973SWenlei He!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2)
1466bae5973SWenlei He!27 = !DILocation(line: 17, column: 3, scope: !18)
1476bae5973SWenlei He!28 = !DILocation(line: 14, column: 10, scope: !29)
1486bae5973SWenlei He!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37)
1496bae5973SWenlei He!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3)
1506bae5973SWenlei He!31 = !DILocation(line: 14, column: 29, scope: !29)
1516bae5973SWenlei He!32 = !DILocation(line: 14, column: 21, scope: !33)
1526bae5973SWenlei He!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2)
1536bae5973SWenlei He!34 = !DILocation(line: 14, column: 19, scope: !29)
1546bae5973SWenlei He!35 = !DILocation(line: 14, column: 7, scope: !29)
1556bae5973SWenlei He!36 = !DILocation(line: 13, column: 33, scope: !37)
1566bae5973SWenlei He!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6)
1576bae5973SWenlei He!38 = !DILocation(line: 13, column: 26, scope: !39)
1586bae5973SWenlei He!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2)
1596bae5973SWenlei He!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
1606bae5973SWenlei He!44 = !DILocation(line: 27, column: 22, scope: !40)
1616bae5973SWenlei He!45 = !DILocation(line: 27, column: 11, scope: !40)
1626bae5973SWenlei He!46 = !DILocation(line: 29, column: 3, scope: !40)
1636bae5973SWenlei He!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
1646bae5973SWenlei He!51 = !DILocation(line: 33, column: 22, scope: !47)
1656bae5973SWenlei He!52 = !DILocation(line: 33, column: 11, scope: !47)
1666bae5973SWenlei He!53 = !DILocation(line: 35, column: 3, scope: !47)
1676bae5973SWenlei He!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
1686bae5973SWenlei He!57 = !DILocation(line: 49, column: 9, scope: !58)
1696bae5973SWenlei He!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7)
1706bae5973SWenlei He!59 = !DILocation(line: 49, column: 7, scope: !54)
1716bae5973SWenlei He!60 = !DILocation(line: 58, column: 14, scope: !61)
1726bae5973SWenlei He!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2)
1736bae5973SWenlei He!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8)
1746bae5973SWenlei He!63 = !DILocation(line: 58, column: 5, scope: !61)
1756bae5973SWenlei He!64 = !DILocation(line: 52, column: 16, scope: !65)
1766bae5973SWenlei He!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19)
1776bae5973SWenlei He!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14)
1786bae5973SWenlei He!67 = !DILocation(line: 52, column: 12, scope: !65)
1796bae5973SWenlei He!68 = !DILocation(line: 52, column: 9, scope: !65)
1806bae5973SWenlei He!69 = !DILocation(line: 51, column: 14, scope: !70)
1816bae5973SWenlei He!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2)
1826bae5973SWenlei He!71 = !DILocation(line: 51, column: 5, scope: !70)
1836bae5973SWenlei He!72 = !DILocation(line: 59, column: 16, scope: !73)
1846bae5973SWenlei He!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19)
1856bae5973SWenlei He!74 = !DILocation(line: 59, column: 12, scope: !73)
1866bae5973SWenlei He!75 = !DILocation(line: 59, column: 9, scope: !73)
1876bae5973SWenlei He!76 = !DILocation(line: 63, column: 3, scope: !54)
188