1;; Test callsite context graph generation for call graph with with MIBs 2;; that have pruned contexts that partially match multiple inlined 3;; callsite contexts, requiring duplication of context ids and nodes 4;; while matching callsite nodes onto the graph. Also tests graph and IR 5;; cloning. 6;; 7;; Original code looks like: 8;; 9;; char *D() { 10;; return new char[10]; 11;; } 12;; 13;; char *F() { 14;; return D(); 15;; } 16;; 17;; char *C() { 18;; return D(); 19;; } 20;; 21;; char *B() { 22;; return C(); 23;; } 24;; 25;; char *E() { 26;; return C(); 27;; } 28;; int main(int argc, char **argv) { 29;; char *x = B(); // cold 30;; char *y = E(); // cold 31;; char *z = F(); // default 32;; memset(x, 0, 10); 33;; memset(y, 0, 10); 34;; memset(z, 0, 10); 35;; delete[] z; 36;; sleep(10); 37;; delete[] x; 38;; delete[] y; 39;; return 0; 40;; } 41;; 42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 43;; memory freed after sleep(10) results in cold lifetimes. 44;; 45;; The code below was created by forcing inlining of C into both B and E. 46;; Since both allocation contexts via C are cold, the matched memprof 47;; metadata has the context pruned above C's callsite. This requires 48;; matching the stack node for C to callsites where it was inlined (i.e. 49;; the callsites in B and E that have callsite metadata that includes C's). 50;; It also requires duplication of that node in the graph as well as the 51;; duplication of the context ids along that path through the graph, 52;; so that we can represent the duplicated (via inlining) C callsite. 53;; 54;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 55 56;; -stats requires asserts 57; REQUIRES: asserts 58 59; RUN: opt -thinlto-bc %s >%t.o 60; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 61; RUN: -r=%t.o,main,plx \ 62; RUN: -r=%t.o,_ZdaPv, \ 63; RUN: -r=%t.o,sleep, \ 64; RUN: -r=%t.o,_Znam, \ 65; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 66; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 67; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ 68; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \ 69; RUN: --check-prefix=STATS 70 71; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE 72; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST 73;; We should clone D once for the cold allocations via C. 74; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 75 76 77;; Try again but with distributed ThinLTO 78; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 79; RUN: -thinlto-distributed-indexes \ 80; RUN: -r=%t.o,main,plx \ 81; RUN: -r=%t.o,_ZdaPv, \ 82; RUN: -r=%t.o,sleep, \ 83; RUN: -r=%t.o,_Znam, \ 84; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 85; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \ 86; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 87; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ 88; RUN: --check-prefix=STATS 89 90; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE 91; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST 92;; We should clone D once for the cold allocations via C. 93; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 94 95;; Check distributed index 96; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB 97 98source_filename = "duplicate-context-ids.ll" 99target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 100target triple = "x86_64-unknown-linux-gnu" 101 102define internal ptr @_Z1Dv() { 103entry: 104 %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 105 ret ptr null 106} 107 108declare ptr @_Znam(i64) 109 110define internal ptr @_Z1Fv() { 111entry: 112 %call = call ptr @_Z1Dv(), !callsite !6 113 ret ptr null 114} 115 116define internal ptr @_Z1Cv() { 117entry: 118 %call = call ptr @_Z1Dv(), !callsite !7 119 ret ptr null 120} 121 122define internal ptr @_Z1Bv() { 123entry: 124 %call.i = call ptr @_Z1Dv(), !callsite !8 125 ret ptr null 126} 127 128define internal ptr @_Z1Ev() { 129entry: 130 %call.i = call ptr @_Z1Dv(), !callsite !9 131 ret ptr null 132} 133 134define i32 @main() { 135entry: 136 call ptr @_Z1Bv() 137 call ptr @_Z1Ev() 138 call ptr @_Z1Fv() 139 ret i32 0 140} 141 142declare void @_ZdaPv() 143 144declare i32 @sleep() 145 146!0 = !{!1, !3} 147!1 = !{!2, !"cold"} 148!2 = !{i64 6541423618768552252, i64 -6270142974039008131} 149!3 = !{!4, !"notcold"} 150!4 = !{i64 6541423618768552252, i64 -4903163940066524832} 151!5 = !{i64 6541423618768552252} 152!6 = !{i64 -4903163940066524832} 153!7 = !{i64 -6270142974039008131} 154!8 = !{i64 -6270142974039008131, i64 -184525619819294889} 155!9 = !{i64 -6270142974039008131, i64 1905834578520680781} 156 157 158;; After adding only the alloc node memprof metadata, we only have 2 contexts. 159 160; DUMP: CCG before updating call stack chains: 161; DUMP: Callsite Context Graph: 162; DUMP: Node [[D:0x[a-z0-9]+]] 163; DUMP: Versions: 1 MIB: 164; DUMP: AllocType 2 StackIds: 0 165; DUMP: AllocType 1 StackIds: 1 166; DUMP: (clone 0) 167; DUMP: AllocTypes: NotColdCold 168; DUMP: ContextIds: 1 2 169; DUMP: CalleeEdges: 170; DUMP: CallerEdges: 171; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 172; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 173 174; DUMP: Node [[C]] 175; DUMP: null Call 176; DUMP: AllocTypes: Cold 177; DUMP: ContextIds: 1 178; DUMP: CalleeEdges: 179; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 180; DUMP: CallerEdges: 181 182; DUMP: Node [[F]] 183; DUMP: null Call 184; DUMP: AllocTypes: NotCold 185; DUMP: ContextIds: 2 186; DUMP: CalleeEdges: 187; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 188; DUMP: CallerEdges: 189 190;; After updating for callsite metadata, we should have generated context ids 3 and 4, 191;; along with 2 new nodes for those callsites. All have the same allocation type 192;; behavior as the original C node. 193 194; DUMP: CCG before cloning: 195; DUMP: Callsite Context Graph: 196; DUMP: Node [[D]] 197; DUMP: Versions: 1 MIB: 198; DUMP: AllocType 2 StackIds: 0 199; DUMP: AllocType 1 StackIds: 1 200; DUMP: (clone 0) 201; DUMP: AllocTypes: NotColdCold 202; DUMP: ContextIds: 1 2 3 4 203; DUMP: CalleeEdges: 204; DUMP: CallerEdges: 205; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 206; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 207; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 208; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 209 210; DUMP: Node [[F]] 211; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0) 212; DUMP: AllocTypes: NotCold 213; DUMP: ContextIds: 2 214; DUMP: CalleeEdges: 215; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 216; DUMP: CallerEdges: 217 218; DUMP: Node [[C2]] 219; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0) 220; DUMP: AllocTypes: Cold 221; DUMP: ContextIds: 3 222; DUMP: CalleeEdges: 223; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 224; DUMP: CallerEdges: 225 226; DUMP: Node [[B]] 227; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0) 228; DUMP: AllocTypes: Cold 229; DUMP: ContextIds: 4 230; DUMP: CalleeEdges: 231; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 232; DUMP: CallerEdges: 233 234; DUMP: Node [[E]] 235; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0) 236; DUMP: AllocTypes: Cold 237; DUMP: ContextIds: 1 238; DUMP: CalleeEdges: 239; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 240; DUMP: CallerEdges: 241 242 243; DUMP: CCG after cloning: 244; DUMP: Callsite Context Graph: 245; DUMP: Node [[D]] 246; DUMP: Versions: 1 MIB: 247; DUMP: AllocType 2 StackIds: 0 248; DUMP: AllocType 1 StackIds: 1 249; DUMP: (clone 0) 250; DUMP: AllocTypes: NotCold 251; DUMP: ContextIds: 2 252; DUMP: CalleeEdges: 253; DUMP: CallerEdges: 254; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 255; DUMP: Clones: [[D2:0x[a-z0-9]+]] 256 257; DUMP: Node [[F]] 258; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0) 259; DUMP: AllocTypes: NotCold 260; DUMP: ContextIds: 2 261; DUMP: CalleeEdges: 262; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 263; DUMP: CallerEdges: 264 265; DUMP: Node [[C2]] 266; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0) 267; DUMP: AllocTypes: Cold 268; DUMP: ContextIds: 3 269; DUMP: CalleeEdges: 270; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 271; DUMP: CallerEdges: 272 273; DUMP: Node [[B]] 274; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0) 275; DUMP: AllocTypes: Cold 276; DUMP: ContextIds: 4 277; DUMP: CalleeEdges: 278; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 279; DUMP: CallerEdges: 280 281; DUMP: Node [[E]] 282; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0) 283; DUMP: AllocTypes: Cold 284; DUMP: ContextIds: 1 285; DUMP: CalleeEdges: 286; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 287; DUMP: CallerEdges: 288 289; DUMP: Node [[D2]] 290; DUMP: Versions: 1 MIB: 291; DUMP: AllocType 2 StackIds: 0 292; DUMP: AllocType 1 StackIds: 1 293; DUMP: (clone 0) 294; DUMP: AllocTypes: Cold 295; DUMP: ContextIds: 1 3 4 296; DUMP: CalleeEdges: 297; DUMP: CallerEdges: 298; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 299; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 300; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 301; DUMP: Clone of [[D]] 302 303 304; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 305; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 306; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis 307 308 309; DOTPRE: digraph "prestackupdate" { 310; DOTPRE: label="prestackupdate"; 311; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; 312; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; 313; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; 314; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; 315; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 316; DOTPRE: } 317 318 319; DOTPOST:digraph "postbuild" { 320; DOTPOST: label="postbuild"; 321; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; 322; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; 323; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 324; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; 325; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; 326; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; 327; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; 328; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; 329; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; 330; DOTPOST:} 331 332 333; DOTCLONED: digraph "cloned" { 334; DOTCLONED: label="cloned"; 335; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; 336; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; 337; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 338; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; 339; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"]; 340; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; 341; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"]; 342; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; 343; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"]; 344; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; 345; DOTCLONED: } 346 347; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 1643923691937891493, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1) 348; DISTRIB: ^[[D]] = gv: (guid: 4881081444663423788, {{.*}} allocs: ((versions: (notcold, cold) 349; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 14590037969532473829, {{.*}} callsites: ((callee: ^[[D]], clones: (1) 350; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 17035303613541779335, {{.*}} callsites: ((callee: ^[[D]], clones: (0) 351; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 17820708772846654376, {{.*}} callsites: ((callee: ^[[D]], clones: (1) 352