1; Test for CSSPGO's new early inliner using priority queue 2 3; Note that we need new pass manager to enable top-down processing for sample profile loader 4; Test we inlined the following in top-down order with old inliner 5; main:3 @ _Z5funcAi 6; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi 7; _Z5funcBi:1 @ _Z8funcLeafi 8; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE 9 10; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5 11; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE 12 13; RUN: llvm-profdata merge --sample --text --convert-sample-profile-layout=nest %S/Inputs/profile-context-tracker.prof -o %t.prof 14; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE 15 16; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile 17; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW 18; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof -sample-profile-prioritized-inline -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW 19; 20; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning hot cutoff can get us the same inlining 21; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE 22; 23; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, tuning cold sample profile inline threshold can get us the same inlining 24; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-cold-inline-threshold=200 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE 25; 26; With new FDO early inliner and tuned cutoff, we can control inlining through size growth tuning knob. 27; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=0 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --allow-empty --check-prefix=INLINE-NEW-LIMIT1 28; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-summary-cutoff-hot=999990 -sample-profile-inline-limit-min=10 -sample-profile-inline-growth-limit=1 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW-LIMIT2 29 30 31; INLINE-BASE: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 32; INLINE-BASE: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11 @ main:3:10 33; INLINE-BASE: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 34 35; INLINE-NEW: remark: merged.cpp:14:10: '_Z5funcAi' inlined into 'main' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite main:3:10 36; INLINE-NEW-NOT: remark 37 38; INLINE-NEW-LIMIT1-NOT: remark 39 40; INLINE-NEW-LIMIT2: remark: merged.cpp:33:11: '_Z8funcLeafi' inlined into '_Z5funcBi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcBi:1:11 41; INLINE-NEW-LIMIT2: remark: merged.cpp:27:11: '_Z8funcLeafi' inlined into '_Z5funcAi' to match profiling context with (cost={{[0-9]+}}, threshold={{[0-9]+}}) at callsite _Z5funcAi:1:11; 42; INLINE-NEW-LIMIT2-NOT: remark 43 44@factor = dso_local global i32 3, align 4, !dbg !0 45 46define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { 47entry: 48 br label %for.body, !dbg !25 49 50for.cond.cleanup: ; preds = %for.body 51 ret i32 %add3, !dbg !27 52 53for.body: ; preds = %for.body, %entry 54 %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] 55 %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] 56 %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 57 %add = add nuw nsw i32 %x.011, 1, !dbg !31 58 %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 59 %add2 = add i32 %call, %r.010, !dbg !34 60 %add3 = add i32 %add2, %call1, !dbg !35 61 %dec = add nsw i32 %x.011, -1, !dbg !36 62 %cmp = icmp eq i32 %x.011, 0, !dbg !38 63 br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 64} 65 66define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #1 !dbg !40 { 67entry: 68 %add = add nsw i32 %x, 100000, !dbg !44 69 %call = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !45 70 ret i32 %call, !dbg !46 71} 72 73define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { 74entry: 75 %cmp = icmp sgt i32 %x, 0, !dbg !57 76 br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 77 78while.cond2.preheader: ; preds = %entry 79 %cmp313 = icmp slt i32 %x, 0, !dbg !60 80 br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 81 82while.body: ; preds = %while.body, %entry 83 %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] 84 %tmp = load volatile i32, ptr @factor, align 4, !dbg !64 85 %call = tail call i32 @_Z3fibi(i32 %tmp), !dbg !67 86 %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 87 %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 88 br i1 %cmp1, label %while.body, label %if.end, !dbg !71 89 90while.body4: ; preds = %while.body4, %while.cond2.preheader 91 %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] 92 %tmp1 = load volatile i32, ptr @factor, align 4, !dbg !72 93 %call5 = tail call i32 @_Z3fibi(i32 %tmp1), !dbg !74 94 %add = add nsw i32 %call5, %x.addr.114, !dbg !75 95 %cmp3 = icmp slt i32 %add, 0, !dbg !60 96 br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 97 98if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader 99 %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] 100 ret i32 %x.addr.2, !dbg !76 101} 102 103define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { 104entry: 105 %sub = add nsw i32 %x, -100000, !dbg !51 106 %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 107 ret i32 %call, !dbg !53 108} 109 110declare i32 @_Z3fibi(i32) 111 112attributes #0 = { nofree noinline norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 113attributes #1 = { nofree norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 114 115!llvm.dbg.cu = !{!2} 116!llvm.module.flags = !{!14, !15, !16} 117!llvm.ident = !{!17} 118 119!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) 120!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) 121!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) 122!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") 123!4 = !{} 124!5 = !{!6, !10, !11} 125!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 126!7 = !DISubroutineType(types: !8) 127!8 = !{!9, !9} 128!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 129!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 130!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 131!12 = !{!0} 132!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) 133!14 = !{i32 7, !"Dwarf Version", i32 4} 134!15 = !{i32 2, !"Debug Info Version", i32 3} 135!16 = !{i32 1, !"wchar_size", i32 4} 136!17 = !{!"clang version 11.0.0"} 137!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) 138!19 = !DISubroutineType(types: !20) 139!20 = !{!9} 140!21 = !{!22, !23} 141!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) 142!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) 143!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) 144!25 = !DILocation(line: 13, column: 3, scope: !26) 145!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) 146!27 = !DILocation(line: 17, column: 3, scope: !18) 147!28 = !DILocation(line: 14, column: 10, scope: !29) 148!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) 149!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) 150!31 = !DILocation(line: 14, column: 29, scope: !29) 151!32 = !DILocation(line: 14, column: 21, scope: !33) 152!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) 153!34 = !DILocation(line: 14, column: 19, scope: !29) 154!35 = !DILocation(line: 14, column: 7, scope: !29) 155!36 = !DILocation(line: 13, column: 33, scope: !37) 156!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) 157!38 = !DILocation(line: 13, column: 26, scope: !39) 158!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) 159!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 160!44 = !DILocation(line: 27, column: 22, scope: !40) 161!45 = !DILocation(line: 27, column: 11, scope: !40) 162!46 = !DILocation(line: 29, column: 3, scope: !40) 163!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 164!51 = !DILocation(line: 33, column: 22, scope: !47) 165!52 = !DILocation(line: 33, column: 11, scope: !47) 166!53 = !DILocation(line: 35, column: 3, scope: !47) 167!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 168!57 = !DILocation(line: 49, column: 9, scope: !58) 169!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) 170!59 = !DILocation(line: 49, column: 7, scope: !54) 171!60 = !DILocation(line: 58, column: 14, scope: !61) 172!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) 173!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) 174!63 = !DILocation(line: 58, column: 5, scope: !61) 175!64 = !DILocation(line: 52, column: 16, scope: !65) 176!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) 177!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) 178!67 = !DILocation(line: 52, column: 12, scope: !65) 179!68 = !DILocation(line: 52, column: 9, scope: !65) 180!69 = !DILocation(line: 51, column: 14, scope: !70) 181!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) 182!71 = !DILocation(line: 51, column: 5, scope: !70) 183!72 = !DILocation(line: 59, column: 16, scope: !73) 184!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) 185!74 = !DILocation(line: 59, column: 12, scope: !73) 186!75 = !DILocation(line: 59, column: 9, scope: !73) 187!76 = !DILocation(line: 63, column: 3, scope: !54) 188