1;; Test callsite context graph generation for simple call graph with 2;; two memprof contexts and no inlining. 3;; 4;; Original code looks like: 5;; 6;; char *bar() { 7;; return new char[10]; 8;; } 9;; 10;; char *baz() { 11;; return bar(); 12;; } 13;; 14;; char *foo() { 15;; return baz(); 16;; } 17;; 18;; int main(int argc, char **argv) { 19;; char *x = foo(); 20;; char *y = foo(); 21;; memset(x, 0, 10); 22;; memset(y, 0, 10); 23;; delete[] x; 24;; sleep(10); 25;; delete[] y; 26;; return 0; 27;; } 28;; 29;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 30;; memory freed after sleep(10) results in cold lifetimes. 31;; 32;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 33 34;; -stats requires asserts 35; REQUIRES: asserts 36 37; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o 38; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 39; RUN: -supports-hot-cold-new \ 40; RUN: -r=%t.o,main,plx \ 41; RUN: -r=%t.o,_ZdaPv, \ 42; RUN: -r=%t.o,sleep, \ 43; RUN: -r=%t.o,_Znam, \ 44; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 45; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 46; RUN: -memprof-report-hinted-sizes \ 47; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ 48; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=DUMP-SIZES \ 49; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS \ 50; RUN: --check-prefix=SIZES 51 52; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT 53;; We should have cloned bar, baz, and foo, for the cold memory allocation. 54; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 55 56; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR 57 58 59;; Try again but with distributed ThinLTO 60; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 61; RUN: -supports-hot-cold-new \ 62; RUN: -thinlto-distributed-indexes \ 63; RUN: -r=%t.o,main,plx \ 64; RUN: -r=%t.o,_ZdaPv, \ 65; RUN: -r=%t.o,sleep, \ 66; RUN: -r=%t.o,_Znam, \ 67; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 68; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ 69; RUN: -memprof-report-hinted-sizes \ 70; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 71; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ 72; RUN: --check-prefix=STATS --check-prefix=SIZES 73 74; RUN: cat %t2.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT 75;; We should have cloned bar, baz, and foo, for the cold memory allocation. 76; RUN: cat %t2.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 77 78;; Check distributed index 79; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB 80 81;; Run ThinLTO backend 82; RUN: opt -passes=memprof-context-disambiguation \ 83; RUN: -memprof-import-summary=%t.o.thinlto.bc \ 84; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 85; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \ 86; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS 87 88source_filename = "memprof-basic.ll" 89target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 90target triple = "x86_64-unknown-linux-gnu" 91 92define i32 @main() #0 { 93entry: 94 %call = call ptr @_Z3foov(), !callsite !0 95 %call1 = call ptr @_Z3foov(), !callsite !1 96 ret i32 0 97} 98 99declare void @_ZdaPv() 100 101declare i32 @sleep() 102 103define internal ptr @_Z3barv() #0 { 104entry: 105 %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 106 ret ptr null 107} 108 109declare ptr @_Znam(i64) 110 111define internal ptr @_Z3bazv() #0 { 112entry: 113 %call = call ptr @_Z3barv(), !callsite !8 114 ret ptr null 115} 116 117define internal ptr @_Z3foov() #0 { 118entry: 119 %call = call ptr @_Z3bazv(), !callsite !9 120 ret ptr null 121} 122 123; uselistorder directives 124uselistorder ptr @_Z3foov, { 1, 0 } 125 126attributes #0 = { noinline optnone } 127 128!0 = !{i64 8632435727821051414} 129!1 = !{i64 -3421689549917153178} 130!2 = !{!3, !5} 131!3 = !{!4, !"notcold", !10} 132!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} 133!5 = !{!6, !"cold", !11, !12} 134!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} 135!7 = !{i64 9086428284934609951} 136!8 = !{i64 -5964873800580613432} 137!9 = !{i64 2732490490862098848} 138!10 = !{i64 123, i64 100} 139!11 = !{i64 456, i64 200} 140!12 = !{i64 789, i64 300} 141 142 143; DUMP: CCG before cloning: 144; DUMP: Callsite Context Graph: 145; DUMP: Node [[BAR:0x[a-z0-9]+]] 146; DUMP: Versions: 1 MIB: 147; DUMP: AllocType 1 StackIds: 2, 3, 0 148; DUMP: AllocType 2 StackIds: 2, 3, 1 149; DUMP-SIZES: ContextSizeInfo per MIB: 150; DUMP-SIZES: { 123, 100 } 151; DUMP-SIZES: { 456, 200 }, { 789, 300 } 152; DUMP: (clone 0) 153; DUMP: AllocTypes: NotColdCold 154; DUMP: ContextIds: 1 2 155; DUMP: CalleeEdges: 156; DUMP: CallerEdges: 157; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 158 159; DUMP: Node [[BAZ]] 160; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) 161; DUMP: AllocTypes: NotColdCold 162; DUMP: ContextIds: 1 2 163; DUMP: CalleeEdges: 164; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 165; DUMP: CallerEdges: 166; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 167 168; DUMP: Node [[FOO]] 169; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) 170; DUMP: AllocTypes: NotColdCold 171; DUMP: ContextIds: 1 2 172; DUMP: CalleeEdges: 173; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 174; DUMP: CallerEdges: 175; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 176; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 177 178; DUMP: Node [[MAIN1]] 179; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) 180; DUMP: AllocTypes: NotCold 181; DUMP: ContextIds: 1 182; DUMP: CalleeEdges: 183; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 184; DUMP: CallerEdges: 185 186; DUMP: Node [[MAIN2]] 187; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) 188; DUMP: AllocTypes: Cold 189; DUMP: ContextIds: 2 190; DUMP: CalleeEdges: 191; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 192; DUMP: CallerEdges: 193 194; DUMP: CCG after cloning: 195; DUMP: Callsite Context Graph: 196; DUMP: Node [[BAR]] 197; DUMP: Versions: 1 MIB: 198; DUMP: AllocType 1 StackIds: 2, 3, 0 199; DUMP: AllocType 2 StackIds: 2, 3, 1 200; DUMP: (clone 0) 201; DUMP: AllocTypes: NotCold 202; DUMP: ContextIds: 1 203; DUMP: CalleeEdges: 204; DUMP: CallerEdges: 205; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 206; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] 207 208; DUMP: Node [[BAZ]] 209; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) 210; DUMP: AllocTypes: NotCold 211; DUMP: ContextIds: 1 212; DUMP: CalleeEdges: 213; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotCold ContextIds: 1 214; DUMP: CallerEdges: 215; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 216; DUMP: Clones: [[BAZ2:0x[a-z0-9]+]] 217 218; DUMP: Node [[FOO]] 219; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) 220; DUMP: AllocTypes: NotCold 221; DUMP: ContextIds: 1 222; DUMP: CalleeEdges: 223; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 224; DUMP: CallerEdges: 225; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 226; DUMP: Clones: [[FOO2:0x[a-z0-9]+]] 227 228; DUMP: Node [[MAIN1]] 229; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) 230; DUMP: AllocTypes: NotCold 231; DUMP: ContextIds: 1 232; DUMP: CalleeEdges: 233; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 234; DUMP: CallerEdges: 235 236; DUMP: Node [[MAIN2]] 237; DUMP: Callee: 8107868197919466657 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) 238; DUMP: AllocTypes: Cold 239; DUMP: ContextIds: 2 240; DUMP: CalleeEdges: 241; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 242; DUMP: CallerEdges: 243 244; DUMP: Node [[FOO2]] 245; DUMP: Callee: 1807954217441101578 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) 246; DUMP: AllocTypes: Cold 247; DUMP: ContextIds: 2 248; DUMP: CalleeEdges: 249; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 250; DUMP: CallerEdges: 251; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 252; DUMP: Clone of [[FOO]] 253 254; DUMP: Node [[BAZ2]] 255; DUMP: Callee: 11481133863268513686 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) 256; DUMP: AllocTypes: Cold 257; DUMP: ContextIds: 2 258; DUMP: CalleeEdges: 259; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 260; DUMP: CallerEdges: 261; DUMP: Edge from Callee [[BAZ2]] to Caller: [[FOO2]] AllocTypes: Cold ContextIds: 2 262; DUMP: Clone of [[BAZ]] 263 264; DUMP: Node [[BAR2]] 265; DUMP: Versions: 1 MIB: 266; DUMP: AllocType 1 StackIds: 2, 3, 0 267; DUMP: AllocType 2 StackIds: 2, 3, 1 268; DUMP: (clone 0) 269; DUMP: AllocTypes: Cold 270; DUMP: ContextIds: 2 271; DUMP: CalleeEdges: 272; DUMP: CallerEdges: 273; DUMP: Edge from Callee [[BAR2]] to Caller: [[BAZ2]] AllocTypes: Cold ContextIds: 2 274; DUMP: Clone of [[BAR]] 275 276; SIZES: NotCold full allocation context 123 with total size 100 is NotCold after cloning 277; SIZES: Cold full allocation context 456 with total size 200 is Cold after cloning 278; SIZES: Cold full allocation context 789 with total size 300 is Cold after cloning 279 280; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 281; REMARKS: created clone _Z3barv.memprof.1 282; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold 283; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold 284; REMARKS: created clone _Z3bazv.memprof.1 285; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1 286; REMARKS: created clone _Z3foov.memprof.1 287; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1 288 289 290; IR: define {{.*}} @main 291;; The first call to foo does not allocate cold memory. It should call the 292;; original functions, which ultimately call the original allocation decorated 293;; with a "notcold" attribute. 294; IR: call {{.*}} @_Z3foov() 295;; The second call to foo allocates cold memory. It should call cloned functions 296;; which ultimately call a cloned allocation decorated with a "cold" attribute. 297; IR: call {{.*}} @_Z3foov.memprof.1() 298; IR: define internal {{.*}} @_Z3barv() 299; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]] 300; IR: define internal {{.*}} @_Z3bazv() 301; IR: call {{.*}} @_Z3barv() 302; IR: define internal {{.*}} @_Z3foov() 303; IR: call {{.*}} @_Z3bazv() 304; IR: define internal {{.*}} @_Z3barv.memprof.1() 305; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]] 306; IR: define internal {{.*}} @_Z3bazv.memprof.1() 307; IR: call {{.*}} @_Z3barv.memprof.1() 308; IR: define internal {{.*}} @_Z3foov.memprof.1() 309; IR: call {{.*}} @_Z3bazv.memprof.1() 310; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } 311; IR: attributes #[[COLD]] = { "memprof"="cold" } 312 313 314; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 315; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend 316; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 317; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend 318; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend 319; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis 320; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend 321; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend 322; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend 323; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend 324 325 326; DOT: digraph "postbuild" { 327; DOT: label="postbuild"; 328; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; 329; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; 330; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; 331; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; 332; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; 333; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; 334; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; 335; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; 336; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; 337; DOT: } 338 339 340; DOTCLONED: digraph "cloned" { 341; DOTCLONED: label="cloned"; 342; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; 343; DOTCLONED: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; 344; DOTCLONED: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; 345; DOTCLONED: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; 346; DOTCLONED: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1",fillcolor="brown1"]; 347; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; 348; DOTCLONED: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; 349; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; 350; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; 351; DOTCLONED: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3bazv}"]; 352; DOTCLONED: Node[[FOO2]] -> Node[[BAZ2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; 353; DOTCLONED: Node[[BAZ2]] [shape=record,tooltip="N[[BAZ2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3bazv -\> _Z3barv}"]; 354; DOTCLONED: Node[[BAZ2]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; 355; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; 356; DOTCLONED: } 357 358 359; DISTRIB: ^[[BAZ:[0-9]+]] = gv: (guid: 1807954217441101578, {{.*}} callsites: ((callee: ^[[BAR:[0-9]+]], clones: (0, 1) 360; DISTRIB: ^[[FOO:[0-9]+]] = gv: (guid: 8107868197919466657, {{.*}} callsites: ((callee: ^[[BAZ]], clones: (0, 1) 361; DISTRIB: ^[[BAR]] = gv: (guid: 11481133863268513686, {{.*}} allocs: ((versions: (notcold, cold) 362; DISTRIB: ^[[MAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[FOO]], clones: (0), {{.*}} (callee: ^[[FOO]], clones: (1) 363