1;; Test callsite context graph generation for call graph with with MIBs 2;; that have pruned contexts that partially match multiple inlined 3;; callsite contexts, requiring duplication of context ids and nodes 4;; while matching callsite nodes onto the graph. Also tests graph and IR 5;; cloning. 6;; 7;; Original code looks like: 8;; 9;; char *D() { 10;; return new char[10]; 11;; } 12;; 13;; char *F() { 14;; return D(); 15;; } 16;; 17;; char *C() { 18;; return D(); 19;; } 20;; 21;; char *B() { 22;; return C(); 23;; } 24;; 25;; char *E() { 26;; return C(); 27;; } 28;; int main(int argc, char **argv) { 29;; char *x = B(); // cold 30;; char *y = E(); // cold 31;; char *z = F(); // default 32;; memset(x, 0, 10); 33;; memset(y, 0, 10); 34;; memset(z, 0, 10); 35;; delete[] z; 36;; sleep(10); 37;; delete[] x; 38;; delete[] y; 39;; return 0; 40;; } 41;; 42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 43;; memory freed after sleep(10) results in cold lifetimes. 44;; 45;; The code below was created by forcing inlining of C into both B and E. 46;; Since both allocation contexts via C are cold, the matched memprof 47;; metadata has the context pruned above C's callsite. This requires 48;; matching the stack node for C to callsites where it was inlined (i.e. 49;; the callsites in B and E that have callsite metadata that includes C's). 50;; It also requires duplication of that node in the graph as well as the 51;; duplication of the context ids along that path through the graph, 52;; so that we can represent the duplicated (via inlining) C callsite. 53;; 54;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 55 56; RUN: opt -passes=memprof-context-disambiguation \ 57; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 58; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 59; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 60; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ 61; RUN: --check-prefix=STATS --check-prefix=REMARKS 62 63; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE 64; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST 65;; We should clone D once for the cold allocations via C. 66; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 67 68target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 69target triple = "x86_64-unknown-linux-gnu" 70 71define internal ptr @_Z1Dv() { 72entry: 73 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5 74 ret ptr null 75} 76 77declare ptr @_Znam(i64) 78 79define internal ptr @_Z1Fv() #0 { 80entry: 81 %call = call noundef ptr @_Z1Dv(), !callsite !6 82 ret ptr null 83} 84 85; Function Attrs: mustprogress noinline optnone uwtable 86define internal ptr @_Z1Cv() #1 { 87entry: 88 %call = call noundef ptr @_Z1Dv(), !callsite !7 89 ret ptr null 90} 91 92; Function Attrs: mustprogress noinline optnone uwtable 93define internal ptr @_Z1Bv() #1 { 94entry: 95 %call.i = call noundef ptr @_Z1Dv(), !callsite !8 96 ret ptr null 97} 98 99; Function Attrs: mustprogress noinline optnone uwtable 100define internal ptr @_Z1Ev() #1 { 101entry: 102 %call.i = call noundef ptr @_Z1Dv(), !callsite !9 103 ret ptr null 104} 105 106; Function Attrs: noinline 107declare i32 @main() #2 108 109; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) 110declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 111 112; Function Attrs: nounwind 113declare void @_ZdaPv() #4 114 115declare i32 @sleep() #5 116 117attributes #0 = { "disable-tail-calls"="true" } 118attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } 119attributes #2 = { noinline } 120attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) } 121attributes #4 = { nounwind } 122attributes #5 = { "no-trapping-math"="true" } 123attributes #6 = { builtin } 124 125!0 = !{!1, !3} 126!1 = !{!2, !"cold"} 127!2 = !{i64 6541423618768552252, i64 -6270142974039008131} 128!3 = !{!4, !"notcold"} 129!4 = !{i64 6541423618768552252, i64 -4903163940066524832} 130!5 = !{i64 6541423618768552252} 131!6 = !{i64 -4903163940066524832} 132!7 = !{i64 -6270142974039008131} 133!8 = !{i64 -6270142974039008131, i64 -184525619819294889} 134!9 = !{i64 -6270142974039008131, i64 1905834578520680781} 135 136 137;; After adding only the alloc node memprof metadata, we only have 2 contexts. 138 139; DUMP: CCG before updating call stack chains: 140; DUMP: Callsite Context Graph: 141; DUMP: Node [[D:0x[a-z0-9]+]] 142; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 143; DUMP: AllocTypes: NotColdCold 144; DUMP: ContextIds: 1 2 145; DUMP: CalleeEdges: 146; DUMP: CallerEdges: 147; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 148; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 149 150; DUMP: Node [[C]] 151; DUMP: null Call 152; DUMP: AllocTypes: Cold 153; DUMP: ContextIds: 1 154; DUMP: CalleeEdges: 155; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 156; DUMP: CallerEdges: 157 158; DUMP: Node [[F]] 159; DUMP: null Call 160; DUMP: AllocTypes: NotCold 161; DUMP: ContextIds: 2 162; DUMP: CalleeEdges: 163; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 164; DUMP: CallerEdges: 165 166;; After updating for callsite metadata, we should have generated context ids 3 and 4, 167;; along with 2 new nodes for those callsites. All have the same allocation type 168;; behavior as the original C node. 169 170; DUMP: CCG before cloning: 171; DUMP: Callsite Context Graph: 172; DUMP: Node [[D]] 173; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 174; DUMP: AllocTypes: NotColdCold 175; DUMP: ContextIds: 1 2 3 4 176; DUMP: CalleeEdges: 177; DUMP: CallerEdges: 178; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 179; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 180; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 181; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 182 183; DUMP: Node [[F]] 184; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) 185; DUMP: AllocTypes: NotCold 186; DUMP: ContextIds: 2 187; DUMP: CalleeEdges: 188; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 189; DUMP: CallerEdges: 190 191; DUMP: Node [[C2]] 192; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) 193; DUMP: AllocTypes: Cold 194; DUMP: ContextIds: 3 195; DUMP: CalleeEdges: 196; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 197; DUMP: CallerEdges: 198 199; DUMP: Node [[B]] 200; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) 201; DUMP: AllocTypes: Cold 202; DUMP: ContextIds: 4 203; DUMP: CalleeEdges: 204; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 205; DUMP: CallerEdges: 206 207; DUMP: Node [[E]] 208; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) 209; DUMP: AllocTypes: Cold 210; DUMP: ContextIds: 1 211; DUMP: CalleeEdges: 212; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 213; DUMP: CallerEdges: 214 215; DUMP: CCG after cloning: 216; DUMP: Callsite Context Graph: 217; DUMP: Node [[D]] 218; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 219; DUMP: AllocTypes: NotCold 220; DUMP: ContextIds: 2 221; DUMP: CalleeEdges: 222; DUMP: CallerEdges: 223; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 224; DUMP: Clones: [[D2:0x[a-z0-9]+]] 225 226; DUMP: Node [[F]] 227; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) 228; DUMP: AllocTypes: NotCold 229; DUMP: ContextIds: 2 230; DUMP: CalleeEdges: 231; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 232; DUMP: CallerEdges: 233 234; DUMP: Node [[C2]] 235; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) 236; DUMP: AllocTypes: Cold 237; DUMP: ContextIds: 3 238; DUMP: CalleeEdges: 239; DUMP: Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 240; DUMP: CallerEdges: 241 242; DUMP: Node [[B]] 243; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) 244; DUMP: AllocTypes: Cold 245; DUMP: ContextIds: 4 246; DUMP: CalleeEdges: 247; DUMP: Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 248; DUMP: CallerEdges: 249 250; DUMP: Node [[E]] 251; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) 252; DUMP: AllocTypes: Cold 253; DUMP: ContextIds: 1 254; DUMP: CalleeEdges: 255; DUMP: Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 256; DUMP: CallerEdges: 257 258; DUMP: Node [[D2]] 259; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 260; DUMP: AllocTypes: Cold 261; DUMP: ContextIds: 1 3 4 262; DUMP: CalleeEdges: 263; DUMP: CallerEdges: 264; DUMP: Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 265; DUMP: Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 266; DUMP: Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 267; DUMP: Clone of [[D]] 268 269; REMARKS: created clone _Z1Dv.memprof.1 270; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1 271; REMARKS: call in clone _Z1Cv assigned to call function clone _Z1Dv.memprof.1 272; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1 273; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold 274; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Dv 275; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold 276 277 278;; The allocation via F does not allocate cold memory. It should call the 279;; original D, which ultimately call the original allocation decorated 280;; with a "notcold" attribute. 281; IR: define internal {{.*}} @_Z1Dv() 282; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] 283; IR: define internal {{.*}} @_Z1Fv() 284; IR: call {{.*}} @_Z1Dv() 285;; The allocations via B and E allocate cold memory. They should call the 286;; cloned D, which ultimately call the cloned allocation decorated with a 287;; "cold" attribute. 288; IR: define internal {{.*}} @_Z1Bv() 289; IR: call {{.*}} @_Z1Dv.memprof.1() 290; IR: define internal {{.*}} @_Z1Ev() 291; IR: call {{.*}} @_Z1Dv.memprof.1() 292; IR: define internal {{.*}} @_Z1Dv.memprof.1() 293; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] 294; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } 295; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } 296 297 298; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 299; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 300; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis 301 302 303; DOTPRE: digraph "prestackupdate" { 304; DOTPRE: label="prestackupdate"; 305; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; 306; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; 307; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; 308; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; 309; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 310; DOTPRE: } 311 312 313; DOTPOST:digraph "postbuild" { 314; DOTPOST: label="postbuild"; 315; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; 316; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; 317; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 318; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; 319; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; 320; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; 321; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; 322; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; 323; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; 324; DOTPOST:} 325 326 327; DOTCLONED: digraph "cloned" { 328; DOTCLONED: label="cloned"; 329; DOTCLONED: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; 330; DOTCLONED: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; 331; DOTCLONED: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; 332; DOTCLONED: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; 333; DOTCLONED: Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"]; 334; DOTCLONED: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; 335; DOTCLONED: Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"]; 336; DOTCLONED: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; 337; DOTCLONED: Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"]; 338; DOTCLONED: Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; 339; DOTCLONED: } 340