1;; Test callsite context graph generation for call graph with two memprof 2;; contexts and partial inlining, requiring generation of a new fused node to 3;; represent the inlined sequence while matching callsite nodes onto the graph. 4;; Also tests graph and IR cloning. 5;; 6;; Original code looks like: 7;; 8;; char *bar() { 9;; return new char[10]; 10;; } 11;; 12;; char *baz() { 13;; return bar(); 14;; } 15;; 16;; char *foo() { 17;; return baz(); 18;; } 19;; 20;; int main(int argc, char **argv) { 21;; char *x = foo(); 22;; char *y = foo(); 23;; memset(x, 0, 10); 24;; memset(y, 0, 10); 25;; delete[] x; 26;; sleep(10); 27;; delete[] y; 28;; return 0; 29;; } 30;; 31;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 32;; memory freed after sleep(10) results in cold lifetimes. 33;; 34;; The code below was created by forcing inlining of baz into foo, and 35;; bar into baz. Due to the inlining of bar we will initially have two 36;; allocation nodes in the graph. This tests that we correctly match 37;; foo (with baz inlined) onto the graph nodes first, and generate a new 38;; fused node for it. We should then not match baz (with bar inlined) as that 39;; is not reached by the MIB contexts (since all calls from main will look 40;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). 41;; 42;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 43 44;; -stats requires asserts 45; REQUIRES: asserts 46 47; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ 48; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 49; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 50; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 51; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ 52; RUN: --check-prefix=STATS --check-prefix=REMARKS 53 54; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT 55;; We should create clones for foo and bar for the call from main to allocate 56;; cold memory. 57; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED 58 59 60target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 61target triple = "x86_64-unknown-linux-gnu" 62 63define internal ptr @_Z3barv() { 64entry: 65 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 66 ret ptr null 67} 68 69; Function Attrs: nobuiltin 70declare ptr @_Znam(i64) #0 71 72; Function Attrs: mustprogress 73define internal ptr @_Z3bazv() #1 { 74entry: 75 %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6 76 ret ptr null 77} 78 79; Function Attrs: noinline 80define internal ptr @_Z3foov() #2 { 81entry: 82 %call.i = call noundef ptr @_Z3barv(), !callsite !7 83 ret ptr null 84} 85 86define i32 @main() #3 { 87entry: 88 %call = call noundef ptr @_Z3foov(), !callsite !8 89 %call1 = call noundef ptr @_Z3foov(), !callsite !9 90 ret i32 0 91} 92 93; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) 94declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 95 96; Function Attrs: nounwind 97declare void @_ZdaPv() #5 98 99declare i32 @sleep() #6 100 101attributes #0 = { nobuiltin } 102attributes #1 = { mustprogress } 103attributes #2 = { noinline } 104attributes #3 = { "tune-cpu"="generic" } 105attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } 106attributes #5 = { nounwind } 107attributes #6 = { "disable-tail-calls"="true" } 108attributes #7 = { builtin } 109 110!0 = !{!1, !3} 111!1 = !{!2, !"notcold"} 112!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} 113!3 = !{!4, !"cold"} 114!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} 115!5 = !{i64 9086428284934609951} 116!6 = !{i64 9086428284934609951, i64 -5964873800580613432} 117!7 = !{i64 -5964873800580613432, i64 2732490490862098848} 118!8 = !{i64 8632435727821051414} 119!9 = !{i64 -3421689549917153178} 120 121 122; DUMP: CCG before cloning: 123; DUMP: Callsite Context Graph: 124; DUMP: Node [[BAR:0x[a-z0-9]+]] 125; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 126; DUMP: AllocTypes: NotColdCold 127; DUMP: ContextIds: 1 2 128; DUMP: CalleeEdges: 129; DUMP: CallerEdges: 130; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 131 132;; This is leftover from the MIB on the alloc inlined into baz. It is not 133;; matched with any call, since there is no such node in the IR. Due to the 134;; null call it will not participate in any context transformations. 135; DUMP: Node [[FOO2:0x[a-z0-9]+]] 136; DUMP: null Call 137; DUMP: AllocTypes: NotColdCold 138; DUMP: ContextIds: 3 4 139; DUMP: CalleeEdges: 140; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 141; DUMP: CallerEdges: 142; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 143; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 144 145; DUMP: Node [[MAIN1]] 146; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) 147; DUMP: AllocTypes: NotCold 148; DUMP: ContextIds: 1 3 149; DUMP: CalleeEdges: 150; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 151; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 152; DUMP: CallerEdges: 153 154; DUMP: Node [[MAIN2]] 155; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) 156; DUMP: AllocTypes: Cold 157; DUMP: ContextIds: 2 4 158; DUMP: CalleeEdges: 159; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 160; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 161; DUMP: CallerEdges: 162 163; DUMP: Node [[BAZ]] 164; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 165; DUMP: AllocTypes: NotColdCold 166; DUMP: ContextIds: 3 4 167; DUMP: CalleeEdges: 168; DUMP: CallerEdges: 169; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 170 171;; This is the node synthesized for the call to bar in foo that was created 172;; by inlining baz into foo. 173; DUMP: Node [[FOO]] 174; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) 175; DUMP: AllocTypes: NotColdCold 176; DUMP: ContextIds: 1 2 177; DUMP: CalleeEdges: 178; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 179; DUMP: CallerEdges: 180; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 181; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 182 183; DUMP: CCG after cloning: 184; DUMP: Callsite Context Graph: 185; DUMP: Node [[BAR]] 186; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 187; DUMP: AllocTypes: NotCold 188; DUMP: ContextIds: 1 189; DUMP: CalleeEdges: 190; DUMP: CallerEdges: 191; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 192; DUMP: Clones: [[BAR2:0x[a-z0-9]+]] 193 194; DUMP: Node [[FOO2]] 195; DUMP: null Call 196; DUMP: AllocTypes: NotColdCold 197; DUMP: ContextIds: 3 4 198; DUMP: CalleeEdges: 199; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 200; DUMP: CallerEdges: 201; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 202; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 203 204; DUMP: Node [[MAIN1]] 205; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) 206; DUMP: AllocTypes: NotCold 207; DUMP: ContextIds: 1 3 208; DUMP: CalleeEdges: 209; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 210; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 211; DUMP: CallerEdges: 212 213; DUMP: Node [[MAIN2]] 214; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) 215; DUMP: AllocTypes: Cold 216; DUMP: ContextIds: 2 4 217; DUMP: CalleeEdges: 218; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 219; DUMP: Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 220; DUMP: CallerEdges: 221 222; DUMP: Node [[BAZ]] 223; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 224; DUMP: AllocTypes: NotColdCold 225; DUMP: ContextIds: 3 4 226; DUMP: CalleeEdges: 227; DUMP: CallerEdges: 228; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 229 230; DUMP: Node [[FOO]] 231; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) 232; DUMP: AllocTypes: NotCold 233; DUMP: ContextIds: 1 234; DUMP: CalleeEdges: 235; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1 236; DUMP: CallerEdges: 237; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 238; DUMP: Clones: [[FOO3]] 239 240; DUMP: Node [[FOO3]] 241; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) 242; DUMP: AllocTypes: Cold 243; DUMP: ContextIds: 2 244; DUMP: CalleeEdges: 245; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2 246; DUMP: CallerEdges: 247; DUMP: Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 248; DUMP: Clone of [[FOO]] 249 250; DUMP: Node [[BAR2]] 251; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 252; DUMP: AllocTypes: Cold 253; DUMP: ContextIds: 2 254; DUMP: CalleeEdges: 255; DUMP: CallerEdges: 256; DUMP: Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2 257; DUMP: Clone of [[BAR]] 258 259 260; REMARKS: created clone _Z3barv.memprof.1 261; REMARKS: created clone _Z3foov.memprof.1 262; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1 263; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3barv.memprof.1 264; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold 265; REMARKS: call in clone main assigned to call function clone _Z3foov 266; REMARKS: call in clone _Z3foov assigned to call function clone _Z3barv 267; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold 268; REMARKS: call in clone _Z3bazv marked with memprof allocation attribute notcold 269 270 271; IR: define internal {{.*}} @_Z3barv() 272; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] 273; IR: define internal {{.*}} @_Z3foov() 274; IR: call {{.*}} @_Z3barv() 275; IR: define {{.*}} @main() 276;; The first call to foo does not allocate cold memory. It should call the 277;; original functions, which ultimately call the original allocation decorated 278;; with a "notcold" attribute. 279; IR: call {{.*}} @_Z3foov() 280;; The second call to foo allocates cold memory. It should call cloned functions 281;; which ultimately call a cloned allocation decorated with a "cold" attribute. 282; IR: call {{.*}} @_Z3foov.memprof.1() 283; IR: define internal {{.*}} @_Z3barv.memprof.1() 284; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] 285; IR: define internal {{.*}} @_Z3foov.memprof.1() 286; IR: call {{.*}} @_Z3barv.memprof.1() 287; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } 288; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } 289 290 291; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 292; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 293; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis 294 295 296; DOT: digraph "postbuild" { 297; DOT: label="postbuild"; 298; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; 299; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; 300; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; 301; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; 302; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; 303; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; 304; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; 305; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"]; 306; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; 307; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; 308; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; 309; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; 310; DOT: } 311 312 313; DOTCLONED: digraph "cloned" { 314; DOTCLONED: label="cloned"; 315; DOTCLONED: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; 316; DOTCLONED: Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; 317; DOTCLONED: Node[[FOO2]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; 318; DOTCLONED: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; 319; DOTCLONED: Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"]; 320; DOTCLONED: Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; 321; DOTCLONED: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; 322; DOTCLONED: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"]; 323; DOTCLONED: Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; 324; DOTCLONED: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; 325; DOTCLONED: Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; 326; DOTCLONED: Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; 327; DOTCLONED: Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; 328; DOTCLONED: Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"]; 329; DOTCLONED: Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; 330; DOTCLONED: } 331