1; REQUIRES: x86_64-linux 2; REQUIRES: asserts 3; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s 4 5; The profiled source code: 6 7; volatile int x = 1; 8; __attribute__((noinline)) int bar(int p) { 9; return p; 10; } 11 12; __attribute__((always_inline)) int foo(int i, int p) { 13; if (i % 10) return bar(p); 14; else return bar(p + 1); 15; } 16 17; int main() { 18; for (int i = 0; i < 1000 * 1000; i++) { 19; x += foo(i, x); 20; x += bar(x); 21; x += foo(i, x); 22; x += bar(x); 23; } 24; } 25 26; The source code for the current build: 27 28; volatile int x = 1; 29; __attribute__((noinline)) int bar(int p) { 30; return p; 31; } 32 33; __attribute__((always_inline)) int foo(int i, int p) { 34; if (i % 10) return bar(p); 35; else return bar(p + 1); 36; } 37 38; int main() { 39; if (x == 0) // code change 40; return 0; // code change 41; for (int i = 0; i < 1000 * 1000; i++) { 42; x += foo(i, x); 43; x += bar(x); 44; if (i < 0) // code change 45; return 0; // code change 46; x += foo(i, x); 47; x += bar(x); 48; } 49; } 50 51; Verify not running profile matching for checksum matched function. 52; CHECK-NOT: Run stale profile matching for bar 53 54; CHECK: Run stale profile matching for main 55 56; CHECK: Location is matched from 1 to 1 57; CHECK: Location is matched from 2 to 2 58; CHECK: Location is matched from 3 to 3 59; CHECK: Location is matched from 4 to 4 60; CHECK: Location is matched from 5 to 5 61; CHECK: Location is matched from 6 to 6 62; CHECK: Location is matched from 7 to 7 63; CHECK: Location is matched from 8 to 8 64; CHECK: Location is matched from 9 to 9 65; CHECK: Location is matched from 10 to 10 66; CHECK: Location is matched from 11 to 11 67 68; CHECK: Callsite with callee:foo is matched from 13 to 6 69; CHECK: Location is rematched backwards from 7 to 0 70; CHECK: Location is rematched backwards from 8 to 1 71; CHECK: Location is rematched backwards from 9 to 2 72; CHECK: Location is rematched backwards from 10 to 3 73; CHECK: Location is rematched backwards from 11 to 4 74; CHECK: Callsite with callee:bar is matched from 14 to 7 75; CHECK: Callsite with callee:foo is matched from 15 to 8 76; CHECK: Callsite with callee:bar is matched from 16 to 9 77 78 79; CHECK: 2: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 80; CHECK: 3: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 81; CHECK: 4: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) 82; CHECK: 5: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) 83; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 84; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 101 - factor: 1.00) 85; CHECK: 5: %call.i8 = call i32 @bar(i32 noundef %1), !dbg ![[#]] - weight: 101 - factor: 1.00) 86; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) 87; CHECK: 6: %call1.i5 = call i32 @bar(i32 noundef %add.i4), !dbg ![[#]] - weight: 13 - factor: 1.00) 88; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 89; CHECK: 14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00) 90; CHECK: 8: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) 91; CHECK: 1: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00) 92; CHECK: 2: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00) 93; CHECK: 5: %call.i = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 104 - factor: 1.00) 94; CHECK: 3: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00) 95; CHECK: 6: %call1.i = call i32 @bar(i32 noundef %add.i), !dbg ![[#]] - weight: 14 - factor: 1.00) 96; CHECK: 4: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 121 - factor: 1.00) 97; CHECK: 16: %call9 = call i32 @bar(i32 noundef %7), !dbg ![[#]] - weight: 126 - factor: 1.00) 98; CHECK: 9: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 99; CHECK: 10: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00) 100; CHECK: 11: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg ![[#]] - weight: 116 - factor: 1.00) 101; CHECK: 1: call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00) 102 103 104target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 105target triple = "x86_64-unknown-linux-gnu" 106 107@x = dso_local global i32 1, align 4, !dbg !0 108 109; Function Attrs: noinline nounwind uwtable 110define dso_local i32 @bar(i32 noundef %p) #0 !dbg !16 { 111entry: 112 call void @llvm.dbg.value(metadata i32 %p, metadata !20, metadata !DIExpression()), !dbg !21 113 call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !22 114 ret i32 %p, !dbg !23 115} 116 117; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) 118declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 119 120; Function Attrs: alwaysinline nounwind uwtable 121define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #2 !dbg !24 { 122entry: 123 call void @llvm.dbg.value(metadata i32 %i, metadata !28, metadata !DIExpression()), !dbg !30 124 call void @llvm.dbg.value(metadata i32 %p, metadata !29, metadata !DIExpression()), !dbg !30 125 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !31 126 %rem = srem i32 %i, 10, !dbg !33 127 %tobool = icmp ne i32 %rem, 0, !dbg !33 128 br i1 %tobool, label %if.then, label %if.else, !dbg !34 129 130if.then: ; preds = %entry 131 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !35 132 %call = call i32 @bar(i32 noundef %p), !dbg !36 133 br label %return, !dbg !38 134 135if.else: ; preds = %entry 136 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg !39 137 %add = add nsw i32 %p, 1, !dbg !40 138 %call1 = call i32 @bar(i32 noundef %add), !dbg !41 139 br label %return, !dbg !43 140 141return: ; preds = %if.else, %if.then 142 %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !44 143 call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !45 144 ret i32 %retval.0, !dbg !45 145} 146 147; Function Attrs: nounwind uwtable 148define dso_local i32 @main() #3 !dbg !46 { 149entry: 150 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !52 151 %0 = load volatile i32, ptr @x, align 4, !dbg !52, !tbaa !54 152 %cmp = icmp eq i32 %0, 0, !dbg !58 153 br i1 %cmp, label %if.then, label %if.end, !dbg !59 154 155if.then: ; preds = %entry 156 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !60 157 br label %for.end, !dbg !60 158 159if.end: ; preds = %entry 160 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !61 161 call void @llvm.dbg.value(metadata i32 0, metadata !50, metadata !DIExpression()), !dbg !62 162 br label %for.cond, !dbg !63 163 164for.cond: ; preds = %if.end6, %if.end 165 %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !64 166 call void @llvm.dbg.value(metadata i32 %i.0, metadata !50, metadata !DIExpression()), !dbg !62 167 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !65 168 %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !67 169 br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !68 170 171for.cond.cleanup: ; preds = %for.cond 172 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !68 173 br label %cleanup, !dbg !68 174 175for.body: ; preds = %for.cond 176 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !69 177 %1 = load volatile i32, ptr @x, align 4, !dbg !71, !tbaa !54 178 %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !72 179 %2 = load volatile i32, ptr @x, align 4, !dbg !74, !tbaa !54 180 %add = add nsw i32 %2, %call, !dbg !74 181 store volatile i32 %add, ptr @x, align 4, !dbg !74, !tbaa !54 182 %3 = load volatile i32, ptr @x, align 4, !dbg !75, !tbaa !54 183 %call2 = call i32 @bar(i32 noundef %3), !dbg !76 184 %4 = load volatile i32, ptr @x, align 4, !dbg !78, !tbaa !54 185 %add3 = add nsw i32 %4, %call2, !dbg !78 186 store volatile i32 %add3, ptr @x, align 4, !dbg !78, !tbaa !54 187 br i1 false, label %if.then5, label %if.end6, !dbg !79 188 189if.then5: ; preds = %for.body 190 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !80 191 br label %cleanup, !dbg !80 192 193if.end6: ; preds = %for.body 194 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !82 195 %5 = load volatile i32, ptr @x, align 4, !dbg !83, !tbaa !54 196 %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !84 197 %6 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !54 198 %add8 = add nsw i32 %6, %call7, !dbg !86 199 store volatile i32 %add8, ptr @x, align 4, !dbg !86, !tbaa !54 200 %7 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !54 201 %call9 = call i32 @bar(i32 noundef %7), !dbg !88 202 %8 = load volatile i32, ptr @x, align 4, !dbg !90, !tbaa !54 203 %add10 = add nsw i32 %8, %call9, !dbg !90 204 store volatile i32 %add10, ptr @x, align 4, !dbg !90, !tbaa !54 205 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !91 206 %inc = add nsw i32 %i.0, 1, !dbg !91 207 call void @llvm.dbg.value(metadata i32 %inc, metadata !50, metadata !DIExpression()), !dbg !62 208 br label %for.cond, !dbg !92, !llvm.loop !93 209 210cleanup: ; preds = %if.then5, %for.cond.cleanup 211 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !96 212 br label %for.end 213 214for.end: ; preds = %cleanup, %if.then 215 call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !97 216 ret i32 0, !dbg !97 217} 218 219; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 220declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #4 221 222; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) 223declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #4 224 225; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) 226declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1 227 228; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) 229declare void @llvm.pseudoprobe(i64, i64, i32, i64) #5 230 231; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) 232declare void @llvm.dbg.value(metadata, metadata, metadata) #6 233 234attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 235attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } 236attributes #2 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 237attributes #3 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } 238attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } 239attributes #5 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } 240attributes #6 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 241 242!llvm.dbg.cu = !{!2} 243!llvm.module.flags = !{!7, !8, !9, !10, !11} 244!llvm.ident = !{!12} 245!llvm.pseudo_probe_desc = !{!13, !14, !15} 246 247!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) 248!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) 249!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) 250!3 = !DIFile(filename: "test.c", directory: "path") 251!4 = !{!0} 252!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) 253!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) 254!7 = !{i32 7, !"Dwarf Version", i32 5} 255!8 = !{i32 2, !"Debug Info Version", i32 3} 256!9 = !{i32 1, !"wchar_size", i32 4} 257!10 = !{i32 7, !"uwtable", i32 2} 258!11 = !{i32 7, !"debug-info-assignment-tracking", i1 true} 259!12 = !{!"clang version 17.0.0"} 260!13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"} 261!14 = !{i64 6699318081062747564, i64 563022570642068, !"foo"} 262!15 = !{i64 -2624081020897602054, i64 1126158552146340, !"main"} 263!16 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19) 264!17 = !DISubroutineType(types: !18) 265!18 = !{!6, !6} 266!19 = !{!20} 267!20 = !DILocalVariable(name: "p", arg: 1, scope: !16, file: !3, line: 2, type: !6) 268!21 = !DILocation(line: 0, scope: !16) 269!22 = !DILocation(line: 3, column: 10, scope: !16) 270!23 = !DILocation(line: 3, column: 3, scope: !16) 271!24 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !25, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !27) 272!25 = !DISubroutineType(types: !26) 273!26 = !{!6, !6, !6} 274!27 = !{!28, !29} 275!28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !3, line: 6, type: !6) 276!29 = !DILocalVariable(name: "p", arg: 2, scope: !24, file: !3, line: 6, type: !6) 277!30 = !DILocation(line: 0, scope: !24) 278!31 = !DILocation(line: 7, column: 6, scope: !32) 279!32 = distinct !DILexicalBlock(scope: !24, file: !3, line: 7, column: 6) 280!33 = !DILocation(line: 7, column: 8, scope: !32) 281!34 = !DILocation(line: 7, column: 6, scope: !24) 282!35 = !DILocation(line: 7, column: 26, scope: !32) 283!36 = !DILocation(line: 7, column: 22, scope: !37) 284!37 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646575) 285!38 = !DILocation(line: 7, column: 14, scope: !32) 286!39 = !DILocation(line: 8, column: 19, scope: !32) 287!40 = !DILocation(line: 8, column: 21, scope: !32) 288!41 = !DILocation(line: 8, column: 15, scope: !42) 289!42 = !DILexicalBlockFile(scope: !32, file: !3, discriminator: 186646583) 290!43 = !DILocation(line: 8, column: 8, scope: !32) 291!44 = !DILocation(line: 0, scope: !32) 292!45 = !DILocation(line: 9, column: 1, scope: !24) 293!46 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !47, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !49) 294!47 = !DISubroutineType(types: !48) 295!48 = !{!6} 296!49 = !{!50} 297!50 = !DILocalVariable(name: "i", scope: !51, file: !3, line: 14, type: !6) 298!51 = distinct !DILexicalBlock(scope: !46, file: !3, line: 14, column: 3) 299!52 = !DILocation(line: 12, column: 6, scope: !53) 300!53 = distinct !DILexicalBlock(scope: !46, file: !3, line: 12, column: 6) 301!54 = !{!55, !55, i64 0} 302!55 = !{!"int", !56, i64 0} 303!56 = !{!"omnipotent char", !57, i64 0} 304!57 = !{!"Simple C/C++ TBAA"} 305!58 = !DILocation(line: 12, column: 8, scope: !53) 306!59 = !DILocation(line: 12, column: 6, scope: !46) 307!60 = !DILocation(line: 13, column: 5, scope: !53) 308!61 = !DILocation(line: 14, column: 11, scope: !51) 309!62 = !DILocation(line: 0, scope: !51) 310!63 = !DILocation(line: 14, column: 7, scope: !51) 311!64 = !DILocation(line: 14, scope: !51) 312!65 = !DILocation(line: 14, column: 18, scope: !66) 313!66 = distinct !DILexicalBlock(scope: !51, file: !3, line: 14, column: 3) 314!67 = !DILocation(line: 14, column: 20, scope: !66) 315!68 = !DILocation(line: 14, column: 3, scope: !51) 316!69 = !DILocation(line: 15, column: 15, scope: !70) 317!70 = distinct !DILexicalBlock(scope: !66, file: !3, line: 14, column: 40) 318!71 = !DILocation(line: 15, column: 18, scope: !70) 319!72 = !DILocation(line: 15, column: 11, scope: !73) 320!73 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646639) 321!74 = !DILocation(line: 15, column: 8, scope: !70) 322!75 = !DILocation(line: 16, column: 15, scope: !70) 323!76 = !DILocation(line: 16, column: 11, scope: !77) 324!77 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646647) 325!78 = !DILocation(line: 16, column: 8, scope: !70) 326!79 = !DILocation(line: 17, column: 9, scope: !70) 327!80 = !DILocation(line: 18, column: 8, scope: !81) 328!81 = distinct !DILexicalBlock(scope: !70, file: !3, line: 17, column: 9) 329!82 = !DILocation(line: 19, column: 15, scope: !70) 330!83 = !DILocation(line: 19, column: 18, scope: !70) 331!84 = !DILocation(line: 19, column: 11, scope: !85) 332!85 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646655) 333!86 = !DILocation(line: 19, column: 8, scope: !70) 334!87 = !DILocation(line: 20, column: 15, scope: !70) 335!88 = !DILocation(line: 20, column: 11, scope: !89) 336!89 = !DILexicalBlockFile(scope: !70, file: !3, discriminator: 186646663) 337!90 = !DILocation(line: 20, column: 8, scope: !70) 338!91 = !DILocation(line: 14, column: 36, scope: !66) 339!92 = !DILocation(line: 14, column: 3, scope: !66) 340!93 = distinct !{!93, !68, !94, !95} 341!94 = !DILocation(line: 21, column: 3, scope: !51) 342!95 = !{!"llvm.loop.mustprogress"} 343!96 = !DILocation(line: 0, scope: !46) 344!97 = !DILocation(line: 22, column: 1, scope: !46) 345