1;; Test for different function processing orders affecting inlining in sample profile loader. 2 3;; There is an SCC _Z5funcAi -> _Z8funcLeafi -> _Z5funcAi in the program. 4;; With -use-profiled-call-graph=0, the top-down processing order of 5;; that SCC is (_Z8funcLeafi, _Z5funcAi), which is determinined based on 6;; the static call graph. With -use-profiled-call-graph=1, call edges 7;; from profile are considered, thus the order becomes (_Z5funcAi, _Z8funcLeafi) 8;; which leads to _Z8funcLeafi inlined into _Z5funcAi. 9; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=INLINE 10; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=0 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=NOINLINE 11 12;; There is an indirect call _Z5funcAi -> _Z3fibi in the program. 13;; With -use-profiled-call-graph=0, the processing order computed 14;; based on the static call graph is (_Z3fibi, _Z5funcAi). With 15;; -use-profiled-call-graph=1, the indirect call edge from profile is 16;; considered, thus the order becomes (_Z5funcAi, _Z3fibi) which leads to 17;; _Z3fibi inlined into _Z5funcAi. 18; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sample-profile-file=%S/Inputs/profile-context-order.prof -S | FileCheck %s -check-prefix=ICALL-INLINE 19 20;; When a cycle is formed by profiled edges between _Z5funcBi and _Z8funcLeafi, 21;; the function processing order matters. Without considering call edge weights 22;; _Z8funcLeafi can be processed before _Z5funcBi, thus leads to suboptimal 23;; inlining. 24; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sort-profiled-scc-member=0 -sample-profile-file=%S/Inputs/profile-context-order-scc.prof -S | FileCheck %s -check-prefix=NOINLINEB 25; RUN: opt < %s -passes=sample-profile -use-profiled-call-graph=1 -sort-profiled-scc-member=1 -sample-profile-file=%S/Inputs/profile-context-order-scc.prof -S | FileCheck %s -check-prefix=INLINEB 26 27 28@factor = dso_local global i32 3, align 4, !dbg !0 29@fp = dso_local global ptr null, align 8 30 31; INLINE: define dso_local i32 @_Z5funcAi 32; INLINE-NOT: call i32 @_Z8funcLeafi 33; NOINLINE: define dso_local i32 @_Z5funcAi 34; NOINLINE: call i32 @_Z8funcLeafi 35; ICALL-INLINE: define dso_local i32 @_Z5funcAi 36; ICALL-INLINE: call i32 @_Z3foo 37; INLINEB: define dso_local i32 @_Z5funcBi 38; INLINEB-NOT: call i32 @_Z8funcLeafi 39; NOINLINEB: define dso_local i32 @_Z5funcBi 40; NOINLINEB: call i32 @_Z8funcLeafi 41define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { 42entry: 43 %add = add nsw i32 %x, 100000, !dbg !44 44 %0 = load ptr, ptr @fp, align 8 45 %call = call i32 %0(i32 8), !dbg !45 46 %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 47 ret i32 %call, !dbg !46 48} 49 50define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { 51entry: 52 store ptr @_Z3fibi, ptr @fp, align 8, !dbg !25 53 br label %for.body, !dbg !25 54 55for.cond.cleanup: ; preds = %for.body 56 ret i32 %add3, !dbg !27 57 58for.body: ; preds = %for.body, %entry 59 %x.011 = phi i32 [ 300000, %entry ], [ %dec, %for.body ] 60 %r.010 = phi i32 [ 0, %entry ], [ %add3, %for.body ] 61 %call = tail call i32 @_Z5funcBi(i32 %x.011), !dbg !32 62 %add = add nuw nsw i32 %x.011, 1, !dbg !31 63 %call1 = tail call i32 @_Z5funcAi(i32 %add), !dbg !28 64 %add2 = add i32 %call, %r.010, !dbg !34 65 %add3 = add i32 %add2, %call1, !dbg !35 66 %dec = add nsw i32 %x.011, -1, !dbg !36 67 %cmp = icmp eq i32 %x.011, 0, !dbg !38 68 br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 69} 70 71; INLINE: define dso_local i32 @_Z8funcLeafi 72; NOINLINE: define dso_local i32 @_Z8funcLeafi 73; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi 74; ICALL-NOINLINE: define dso_local i32 @_Z8funcLeafi 75define dso_local i32 @_Z8funcLeafi(i32 %x) local_unnamed_addr #1 !dbg !54 { 76entry: 77 %cmp = icmp sgt i32 %x, 0, !dbg !57 78 br i1 %cmp, label %while.body, label %while.cond2.preheader, !dbg !59 79 80while.cond2.preheader: ; preds = %entry 81 %cmp313 = icmp slt i32 %x, 0, !dbg !60 82 br i1 %cmp313, label %while.body4, label %if.end, !dbg !63 83 84while.body: ; preds = %while.body, %entry 85 %x.addr.016 = phi i32 [ %sub, %while.body ], [ %x, %entry ] 86 %tmp = load volatile i32, ptr @factor, align 4, !dbg !64 87 %call = tail call i32 @_Z5funcAi(i32 %tmp), !dbg !67 88 %sub = sub nsw i32 %x.addr.016, %call, !dbg !68 89 %cmp1 = icmp sgt i32 %sub, 0, !dbg !69 90 br i1 %cmp1, label %while.body, label %if.end, !dbg !71 91 92while.body4: ; preds = %while.body4, %while.cond2.preheader 93 %x.addr.114 = phi i32 [ %add, %while.body4 ], [ %x, %while.cond2.preheader ] 94 %tmp1 = load volatile i32, ptr @factor, align 4, !dbg !72 95 %call5 = tail call i32 @_Z5funcBi(i32 %tmp1), !dbg !74 96 %add = add nsw i32 %call5, %x.addr.114, !dbg !75 97 %cmp3 = icmp slt i32 %add, 0, !dbg !60 98 br i1 %cmp3, label %while.body4, label %if.end, !dbg !63 99 100if.end: ; preds = %while.body4, %while.body, %while.cond2.preheader 101 %x.addr.2 = phi i32 [ 0, %while.cond2.preheader ], [ %sub, %while.body ], [ %add, %while.body4 ] 102 ret i32 %x.addr.2, !dbg !76 103} 104 105define dso_local i32 @_Z5funcBi(i32 %x) local_unnamed_addr #0 !dbg !47 { 106entry: 107 %sub = add nsw i32 %x, -100000, !dbg !51 108 %call = tail call i32 @_Z8funcLeafi(i32 %sub), !dbg !52 109 ret i32 %call, !dbg !53 110} 111 112define dso_local i32 @_Z3fibi(i32 %x) local_unnamed_addr #1 !dbg !77 { 113entry: 114 %sub = add nsw i32 %x, -100000, !dbg !78 115 %call = tail call i32 @_Z3foo(i32 %sub), !dbg !78 116 ret i32 %sub, !dbg !78 117} 118 119declare i32 @_Z3foo(i32) 120 121attributes #0 = { nofree noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 122attributes #1 = { nofree nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" "use-sample-profile" } 123 124!llvm.dbg.cu = !{!2} 125!llvm.module.flags = !{!14, !15, !16} 126!llvm.ident = !{!17} 127 128!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) 129!1 = distinct !DIGlobalVariable(name: "factor", scope: !2, file: !3, line: 21, type: !13, isLocal: false, isDefinition: true) 130!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 11.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !5, globals: !12, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) 131!3 = !DIFile(filename: "merged.cpp", directory: "/local/autofdo") 132!4 = !{} 133!5 = !{!6, !10, !11} 134!6 = !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 6, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 135!7 = !DISubroutineType(types: !8) 136!8 = !{!9, !9} 137!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 138!10 = !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 7, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 139!11 = !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 22, type: !7, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !4) 140!12 = !{!0} 141!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !9) 142!14 = !{i32 7, !"Dwarf Version", i32 4} 143!15 = !{i32 2, !"Debug Info Version", i32 3} 144!16 = !{i32 1, !"wchar_size", i32 4} 145!17 = !{!"clang version 11.0.0"} 146!18 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !19, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21) 147!19 = !DISubroutineType(types: !20) 148!20 = !{!9} 149!21 = !{!22, !23} 150!22 = !DILocalVariable(name: "r", scope: !18, file: !3, line: 12, type: !9) 151!23 = !DILocalVariable(name: "x", scope: !24, file: !3, line: 13, type: !9) 152!24 = distinct !DILexicalBlock(scope: !18, file: !3, line: 13, column: 3) 153!25 = !DILocation(line: 13, column: 3, scope: !26) 154!26 = !DILexicalBlockFile(scope: !24, file: !3, discriminator: 2) 155!27 = !DILocation(line: 17, column: 3, scope: !18) 156!28 = !DILocation(line: 14, column: 10, scope: !29) 157!29 = distinct !DILexicalBlock(scope: !30, file: !3, line: 13, column: 37) 158!30 = distinct !DILexicalBlock(scope: !24, file: !3, line: 13, column: 3) 159!31 = !DILocation(line: 14, column: 29, scope: !29) 160!32 = !DILocation(line: 14, column: 21, scope: !33) 161!33 = !DILexicalBlockFile(scope: !29, file: !3, discriminator: 2) 162!34 = !DILocation(line: 14, column: 19, scope: !29) 163!35 = !DILocation(line: 14, column: 7, scope: !29) 164!36 = !DILocation(line: 13, column: 33, scope: !37) 165!37 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 6) 166!38 = !DILocation(line: 13, column: 26, scope: !39) 167!39 = !DILexicalBlockFile(scope: !30, file: !3, discriminator: 2) 168!40 = distinct !DISubprogram(name: "funcA", linkageName: "_Z5funcAi", scope: !3, file: !3, line: 26, type: !7, scopeLine: 26, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 169!44 = !DILocation(line: 26, column: 22, scope: !40) 170!45 = !DILocation(line: 28, column: 11, scope: !40) 171!46 = !DILocation(line: 27, column: 3, scope: !40) 172!47 = distinct !DISubprogram(name: "funcB", linkageName: "_Z5funcBi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 173!51 = !DILocation(line: 33, column: 22, scope: !47) 174!52 = !DILocation(line: 33, column: 11, scope: !47) 175!53 = !DILocation(line: 35, column: 3, scope: !47) 176!54 = distinct !DISubprogram(name: "funcLeaf", linkageName: "_Z8funcLeafi", scope: !3, file: !3, line: 48, type: !7, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 177!57 = !DILocation(line: 49, column: 9, scope: !58) 178!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 49, column: 7) 179!59 = !DILocation(line: 49, column: 7, scope: !54) 180!60 = !DILocation(line: 58, column: 14, scope: !61) 181!61 = !DILexicalBlockFile(scope: !62, file: !3, discriminator: 2) 182!62 = distinct !DILexicalBlock(scope: !58, file: !3, line: 56, column: 8) 183!63 = !DILocation(line: 58, column: 5, scope: !61) 184!64 = !DILocation(line: 52, column: 16, scope: !65) 185!65 = distinct !DILexicalBlock(scope: !66, file: !3, line: 51, column: 19) 186!66 = distinct !DILexicalBlock(scope: !58, file: !3, line: 49, column: 14) 187!67 = !DILocation(line: 52, column: 12, scope: !65) 188!68 = !DILocation(line: 52, column: 9, scope: !65) 189!69 = !DILocation(line: 51, column: 14, scope: !70) 190!70 = !DILexicalBlockFile(scope: !66, file: !3, discriminator: 2) 191!71 = !DILocation(line: 51, column: 5, scope: !70) 192!72 = !DILocation(line: 59, column: 16, scope: !73) 193!73 = distinct !DILexicalBlock(scope: !62, file: !3, line: 58, column: 19) 194!74 = !DILocation(line: 59, column: 12, scope: !73) 195!75 = !DILocation(line: 59, column: 9, scope: !73) 196!76 = !DILocation(line: 63, column: 3, scope: !54) 197!77 = distinct !DISubprogram(name: "funcB", linkageName: "_Z3fibi", scope: !3, file: !3, line: 32, type: !7, scopeLine: 32, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) 198!78 = !DILocation(line: 33, column: 22, scope: !77) 199