1; REQUIRES: x86_64-linux 2; REQUIRES: asserts 3; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/non-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -profile-isfs 2>&1 | FileCheck %s 4 5; The profiled source code: 6 7; volatile int x = 1; 8; __attribute__((noinline)) int bar(int p) { 9; return p; 10; } 11 12; __attribute__((always_inline)) int foo(int i, int p) { 13; if (i % 10) return bar(p); 14; else return bar(p + 1); 15; } 16 17; int main() { 18; for (int i = 0; i < 1000 * 1000; i++) { 19; x += foo(i, x); 20; x += bar(x); 21; x += foo(i, x); 22; x += bar(x); 23; } 24; } 25 26; The source code for the current build: 27 28; volatile int x = 1; 29; __attribute__((noinline)) int bar(int p) { 30; return p; 31; } 32 33; __attribute__((always_inline)) int foo(int i, int p) { 34; if (i % 10) return bar(p); 35; else return bar(p + 1); 36; } 37 38; int main() { 39; if (x == 0) // code change 40; return 0; // code change 41; for (int i = 0; i < 1000 * 1000; i++) { 42; x += foo(i, x); 43; x += bar(x); 44; if (i < 0) // code change 45; return 0; // code change 46; x += foo(i, x); 47; x += bar(x); 48; } 49; } 50 51; CHECK: Run stale profile matching for main 52; CHECK: Callsite with callee:foo is matched from 4 to 2 53; CHECK: Callsite with callee:bar is matched from 5 to 3 54; CHECK: Callsite with callee:foo is matched from 8 to 4 55; CHECK: Callsite with callee:bar is matched from 9 to 5 56 57; CHECK: Run stale profile matching for foo 58; CHECK: Callsite with callee:bar is matched from 1.15 to 1.15 59; CHECK: Callsite with callee:bar is matched from 2 to 2 60 61; CHECK: Run stale profile matching for bar 62 63target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" 64target triple = "x86_64-unknown-linux-gnu" 65 66@x = dso_local global i32 1, align 4 67 68; Function Attrs: noinline nounwind uwtable 69define dso_local i32 @bar(i32 noundef %p) #0 !dbg !9 { 70entry: 71 ret i32 %p, !dbg !13 72} 73 74; Function Attrs: alwaysinline nounwind uwtable 75define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #1 !dbg !14 { 76entry: 77 %rem = srem i32 %i, 10, !dbg !15 78 %tobool = icmp ne i32 %rem, 0, !dbg !15 79 br i1 %tobool, label %if.then, label %if.else, !dbg !16 80 81if.then: ; preds = %entry 82 %call = call i32 @bar(i32 noundef %p), !dbg !17 83 br label %return, !dbg !19 84 85if.else: ; preds = %entry 86 %add = add nsw i32 %p, 1, !dbg !20 87 %call1 = call i32 @bar(i32 noundef %add), !dbg !21 88 br label %return, !dbg !22 89 90return: ; preds = %if.else, %if.then 91 %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !23 92 ret i32 %retval.0, !dbg !24 93} 94 95; Function Attrs: nounwind uwtable 96define dso_local i32 @main() #2 !dbg !25 { 97entry: 98 %0 = load volatile i32, ptr @x, align 4, !dbg !26, !tbaa !27 99 %cmp = icmp eq i32 %0, 0, !dbg !31 100 br i1 %cmp, label %if.then, label %if.end, !dbg !26 101 102if.then: ; preds = %entry 103 br label %for.end, !dbg !32 104 105if.end: ; preds = %entry 106 br label %for.cond, !dbg !33 107 108for.cond: ; preds = %if.end6, %if.end 109 %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !34 110 %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !35 111 br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !37 112 113for.cond.cleanup: ; preds = %for.cond 114 br label %cleanup, !dbg !38 115 116for.body: ; preds = %for.cond 117 %1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !27 118 %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !41 119 %2 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !27 120 %add = add nsw i32 %2, %call, !dbg !42 121 store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !27 122 %3 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !27 123 %call2 = call i32 @bar(i32 noundef %3), !dbg !44 124 %4 = load volatile i32, ptr @x, align 4, !dbg !45, !tbaa !27 125 %add3 = add nsw i32 %4, %call2, !dbg !45 126 store volatile i32 %add3, ptr @x, align 4, !dbg !45, !tbaa !27 127 br i1 false, label %if.then5, label %if.end6, !dbg !46 128 129if.then5: ; preds = %for.body 130 br label %cleanup, !dbg !47 131 132if.end6: ; preds = %for.body 133 %5 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !27 134 %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !49 135 %6 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !27 136 %add8 = add nsw i32 %6, %call7, !dbg !50 137 store volatile i32 %add8, ptr @x, align 4, !dbg !50, !tbaa !27 138 %7 = load volatile i32, ptr @x, align 4, !dbg !51, !tbaa !27 139 %call9 = call i32 @bar(i32 noundef %7), !dbg !52 140 %8 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !27 141 %add10 = add nsw i32 %8, %call9, !dbg !53 142 store volatile i32 %add10, ptr @x, align 4, !dbg !53, !tbaa !27 143 %inc = add nsw i32 %i.0, 1, !dbg !54 144 br label %for.cond, !dbg !56, !llvm.loop !57 145 146cleanup: ; preds = %if.then5, %for.cond.cleanup 147 br label %for.end 148 149for.end: ; preds = %cleanup, %if.then 150 ret i32 0, !dbg !61 151} 152 153; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 154declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 155 156; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 157declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 158 159attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 160attributes #1 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 161attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 162attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 163 164!llvm.dbg.cu = !{!0} 165!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} 166!llvm.ident = !{!8} 167 168!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) 169!1 = !DIFile(filename: "test.c", directory: "path") 170!2 = !{i32 7, !"Dwarf Version", i32 5} 171!3 = !{i32 2, !"Debug Info Version", i32 3} 172!4 = !{i32 1, !"wchar_size", i32 4} 173!5 = !{i32 8, !"PIC Level", i32 2} 174!6 = !{i32 7, !"PIE Level", i32 2} 175!7 = !{i32 7, !"uwtable", i32 2} 176!8 = !{!"clang version 19.0.0git"} 177!9 = distinct !DISubprogram(name: "bar", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) 178!10 = !DIFile(filename: "test.c", directory: "path") 179!11 = !DISubroutineType(types: !12) 180!12 = !{} 181!13 = !DILocation(line: 3, column: 3, scope: !9) 182!14 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 6, type: !11, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) 183!15 = !DILocation(line: 7, column: 9, scope: !14) 184!16 = !DILocation(line: 7, column: 7, scope: !14) 185!17 = !DILocation(line: 7, column: 23, scope: !18) 186!18 = !DILexicalBlockFile(scope: !14, file: !10, discriminator: 15) 187!19 = !DILocation(line: 7, column: 15, scope: !18) 188!20 = !DILocation(line: 8, column: 21, scope: !14) 189!21 = !DILocation(line: 8, column: 15, scope: !14) 190!22 = !DILocation(line: 8, column: 8, scope: !14) 191!23 = !DILocation(line: 0, scope: !14) 192!24 = !DILocation(line: 9, column: 1, scope: !14) 193!25 = distinct !DISubprogram(name: "main", scope: !10, file: !10, line: 11, type: !11, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) 194!26 = !DILocation(line: 12, column: 7, scope: !25) 195!27 = !{!28, !28, i64 0} 196!28 = !{!"int", !29, i64 0} 197!29 = !{!"omnipotent char", !30, i64 0} 198!30 = !{!"Simple C/C++ TBAA"} 199!31 = !DILocation(line: 12, column: 9, scope: !25) 200!32 = !DILocation(line: 13, column: 5, scope: !25) 201!33 = !DILocation(line: 14, column: 8, scope: !25) 202!34 = !DILocation(line: 14, scope: !25) 203!35 = !DILocation(line: 14, column: 21, scope: !36) 204!36 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 15) 205!37 = !DILocation(line: 14, column: 3, scope: !36) 206!38 = !DILocation(line: 14, column: 3, scope: !39) 207!39 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 4) 208!40 = !DILocation(line: 15, column: 18, scope: !25) 209!41 = !DILocation(line: 15, column: 11, scope: !25) 210!42 = !DILocation(line: 15, column: 8, scope: !25) 211!43 = !DILocation(line: 16, column: 15, scope: !25) 212!44 = !DILocation(line: 16, column: 11, scope: !25) 213!45 = !DILocation(line: 16, column: 8, scope: !25) 214!46 = !DILocation(line: 17, column: 10, scope: !25) 215!47 = !DILocation(line: 18, column: 8, scope: !25) 216!48 = !DILocation(line: 19, column: 18, scope: !25) 217!49 = !DILocation(line: 19, column: 11, scope: !25) 218!50 = !DILocation(line: 19, column: 8, scope: !25) 219!51 = !DILocation(line: 20, column: 15, scope: !25) 220!52 = !DILocation(line: 20, column: 11, scope: !25) 221!53 = !DILocation(line: 20, column: 8, scope: !25) 222!54 = !DILocation(line: 14, column: 37, scope: !55) 223!55 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 6) 224!56 = !DILocation(line: 14, column: 3, scope: !55) 225!57 = distinct !{!57, !58, !59, !60} 226!58 = !DILocation(line: 14, column: 3, scope: !25) 227!59 = !DILocation(line: 21, column: 3, scope: !25) 228!60 = !{!"llvm.loop.mustprogress"} 229!61 = !DILocation(line: 22, column: 1, scope: !25) 230