1;; Test callsite context graph generation for call graph with with MIBs 2;; that have pruned contexts that partially match multiple inlined 3;; callsite contexts, requiring duplication of context ids and nodes 4;; while matching callsite nodes onto the graph. This test requires more 5;; complex duplication due to multiple contexts for different allocations 6;; that share some of the same callsite nodes. 7;; 8;; Original code looks like: 9;; 10;; char *D(bool Call1) { 11;; if (Call1) 12;; return new char[10]; 13;; else 14;; return new char[10]; 15;; } 16;; 17;; char *C(bool Call1) { 18;; return D(Call1); 19;; } 20;; 21;; char *B(bool Call1) { 22;; if (Call1) 23;; return C(true); 24;; else 25;; return C(false); 26;; } 27;; 28;; char *A(bool Call1) { 29;; return B(Call1); 30;; } 31;; 32;; char *A1() { 33;; return A(true); 34;; } 35;; 36;; char *A2() { 37;; return A(true); 38;; } 39;; 40;; char *A3() { 41;; return A(false); 42;; } 43;; 44;; char *A4() { 45;; return A(false); 46;; } 47;; 48;; char *E() { 49;; return B(true); 50;; } 51;; 52;; char *F() { 53;; return B(false); 54;; } 55;; 56;; int main(int argc, char **argv) { 57;; char *a1 = A1(); // cold 58;; char *a2 = A2(); // cold 59;; char *e = E(); // default 60;; char *a3 = A3(); // default 61;; char *a4 = A4(); // default 62;; char *f = F(); // cold 63;; memset(a1, 0, 10); 64;; memset(a2, 0, 10); 65;; memset(e, 0, 10); 66;; memset(a3, 0, 10); 67;; memset(a4, 0, 10); 68;; memset(f, 0, 10); 69;; delete[] a3; 70;; delete[] a4; 71;; delete[] e; 72;; sleep(10); 73;; delete[] a1; 74;; delete[] a2; 75;; delete[] f; 76;; return 0; 77;; } 78;; 79;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 80;; memory freed after sleep(10) results in cold lifetimes. 81;; 82;; The code below was created by forcing inlining of A into its callers, 83;; without any other inlining or optimizations. Since both allocation contexts 84;; via A for each allocation in D have the same allocation type (cold via 85;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second 86;; new in D, the contexts for those respective allocations are pruned above A. 87;; The allocations via E and F are to ensure we don't prune above B. 88;; 89;; The matching onto the inlined A[1234]->A sequences will require duplication 90;; of the context id assigned to the context from A for each allocation in D. 91;; This test ensures that we do this correctly in the presence of callsites 92;; shared by the different duplicated context ids (i.e. callsite in C). 93;; 94;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 95 96; RUN: opt -thinlto-bc %s >%t.o 97; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 98; RUN: -supports-hot-cold-new \ 99; RUN: -r=%t.o,main,plx \ 100; RUN: -r=%t.o,_Z1Db,plx \ 101; RUN: -r=%t.o,_Z1Cb,plx \ 102; RUN: -r=%t.o,_Z1Bb,plx \ 103; RUN: -r=%t.o,_Z1Ab,plx \ 104; RUN: -r=%t.o,_Z2A1v,plx \ 105; RUN: -r=%t.o,_Z2A2v,plx \ 106; RUN: -r=%t.o,_Z2A3v,plx \ 107; RUN: -r=%t.o,_Z2A4v,plx \ 108; RUN: -r=%t.o,_Z1Ev,plx \ 109; RUN: -r=%t.o,_Z1Fv,plx \ 110; RUN: -r=%t.o,_ZdaPv, \ 111; RUN: -r=%t.o,sleep, \ 112; RUN: -r=%t.o,_Znam, \ 113; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 114; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 115; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP 116 117 118target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 119target triple = "x86_64-unknown-linux-gnu" 120 121define ptr @_Z1Db(i1 %Call1) { 122entry: 123 %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 124 br label %return 125 126if.else: ; No predecessors! 127 %call1 = call ptr @_Znam(i64 0), !memprof !6, !callsite !11 128 br label %return 129 130return: ; preds = %if.else, %entry 131 ret ptr null 132} 133 134declare ptr @_Znam(i64) 135 136define ptr @_Z1Cb(i1 %Call1) { 137entry: 138 %call = call ptr @_Z1Db(i1 false), !callsite !12 139 ret ptr null 140} 141 142define ptr @_Z1Bb(i1 %Call1) { 143entry: 144 %call = call ptr @_Z1Cb(i1 false), !callsite !13 145 br label %return 146 147if.else: ; No predecessors! 148 %call1 = call ptr @_Z1Cb(i1 false), !callsite !14 149 br label %return 150 151return: ; preds = %if.else, %entry 152 ret ptr null 153} 154 155define ptr @_Z1Ab() { 156entry: 157 %call = call ptr @_Z1Bb(i1 false), !callsite !15 158 ret ptr null 159} 160 161define ptr @_Z2A1v() { 162entry: 163 %call.i = call ptr @_Z1Bb(i1 false), !callsite !16 164 ret ptr null 165} 166 167define ptr @_Z2A2v() { 168entry: 169 %call.i = call ptr @_Z1Bb(i1 false), !callsite !17 170 ret ptr null 171} 172 173define ptr @_Z2A3v() { 174entry: 175 %call.i = call ptr @_Z1Bb(i1 false), !callsite !18 176 ret ptr null 177} 178 179define ptr @_Z2A4v() { 180entry: 181 %call.i = call ptr @_Z1Bb(i1 false), !callsite !19 182 ret ptr null 183} 184 185define ptr @_Z1Ev() { 186entry: 187 %call = call ptr @_Z1Bb(i1 false), !callsite !20 188 ret ptr null 189} 190 191define ptr @_Z1Fv() { 192entry: 193 %call = call ptr @_Z1Bb(i1 false), !callsite !21 194 ret ptr null 195} 196 197declare i32 @main() 198 199declare void @_ZdaPv() 200 201declare i32 @sleep() 202 203; uselistorder directives 204uselistorder ptr @_Znam, { 1, 0 } 205 206!0 = !{!1, !3} 207!1 = !{!2, !"notcold"} 208!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} 209!3 = !{!4, !"cold"} 210!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} 211!5 = !{i64 4854880825882961848} 212!6 = !{!7, !9} 213!7 = !{!8, !"notcold"} 214!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} 215!9 = !{!10, !"cold"} 216!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} 217!11 = !{i64 -8775068539491628272} 218!12 = !{i64 -904694911315397047} 219!13 = !{i64 6532298921261778285} 220!14 = !{i64 7859682663773658275} 221!15 = !{i64 -6528110295079665978} 222!16 = !{i64 -6528110295079665978, i64 5747919905719679568} 223!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} 224!18 = !{i64 -6528110295079665978, i64 1794685869326395337} 225!19 = !{i64 -6528110295079665978, i64 5462047985461644151} 226!20 = !{i64 1905834578520680781} 227!21 = !{i64 -4903163940066524832} 228 229 230;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only 231;; match the interesting parts of the pre-update graph here). 232 233; DUMP: CCG before updating call stack chains: 234; DUMP: Callsite Context Graph: 235 236; DUMP: Node [[D1:0x[a-z0-9]+]] 237; DUMP: Versions: 1 MIB: 238; DUMP: AllocType 1 StackIds: 0, 1, 2 239; DUMP: AllocType 2 StackIds: 0, 1, 3 240; DUMP: (clone 0) 241; DUMP: AllocTypes: NotColdCold 242; DUMP: ContextIds: 1 2 243 244; DUMP: Node [[C:0x[a-z0-9]+]] 245; DUMP: null Call 246; DUMP: AllocTypes: NotColdCold 247; DUMP: ContextIds: 1 2 3 4 248; DUMP: CalleeEdges: 249; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 250; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 251 252; DUMP: Node [[D2]] 253; DUMP: Versions: 1 MIB: 254; DUMP: AllocType 1 StackIds: 0, 4, 3 255; DUMP: AllocType 2 StackIds: 0, 4, 5 256; DUMP: (clone 0) 257; DUMP: AllocTypes: NotColdCold 258; DUMP: ContextIds: 3 4 259 260 261;; After updating for callsite metadata, we should have duplicated the context 262;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, 263;; and used those on new nodes for those callers. Note that while in reality 264;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, 265;; due to the pruning we have lost this information and thus end up duplicating 266;; both of A's contexts to all of the new nodes (which could result in some 267;; unnecessary cloning. 268 269; DUMP: CCG before cloning: 270; DUMP: Callsite Context Graph: 271; DUMP: Node [[D1]] 272; DUMP: Versions: 1 MIB: 273; DUMP: AllocType 1 StackIds: 0, 1, 2 274; DUMP: AllocType 2 StackIds: 0, 1, 3 275; DUMP: (clone 0) 276; DUMP: AllocTypes: NotColdCold 277; DUMP: ContextIds: 1 2 5 7 9 11 278; DUMP: CalleeEdges: 279; DUMP: CallerEdges: 280; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 281 282; DUMP: Node [[C]] 283; DUMP: Callee: 11485875876353461977 (_Z1Db) Clones: 0 StackIds: 0 (clone 0) 284; DUMP: AllocTypes: NotColdCold 285; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 286; DUMP: CalleeEdges: 287; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 288; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 289; DUMP: CallerEdges: 290; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 291; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 292 293; DUMP: Node [[B1]] 294; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 1 (clone 0) 295; DUMP: AllocTypes: NotColdCold 296; DUMP: ContextIds: 1 2 5 7 9 11 297; DUMP: CalleeEdges: 298; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 299; DUMP: CallerEdges: 300; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 301; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 302; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 303; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 304; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 305; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 306 307; DUMP: Node [[E]] 308; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 2 (clone 0) 309; DUMP: AllocTypes: NotCold 310; DUMP: ContextIds: 1 311; DUMP: CalleeEdges: 312; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 313; DUMP: CallerEdges: 314 315; DUMP: Node [[D2]] 316; DUMP: Versions: 1 MIB: 317; DUMP: AllocType 1 StackIds: 0, 4, 3 318; DUMP: AllocType 2 StackIds: 0, 4, 5 319; DUMP: (clone 0) 320; DUMP: AllocTypes: NotColdCold 321; DUMP: ContextIds: 3 4 6 8 10 12 322; DUMP: CalleeEdges: 323; DUMP: CallerEdges: 324; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 325 326; DUMP: Node [[B2]] 327; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 4 (clone 0) 328; DUMP: AllocTypes: NotColdCold 329; DUMP: ContextIds: 3 4 6 8 10 12 330; DUMP: CalleeEdges: 331; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 332; DUMP: CallerEdges: 333; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 334; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 335; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 336; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 337; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 338; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 339 340; DUMP: Node [[F]] 341; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 5 (clone 0) 342; DUMP: AllocTypes: Cold 343; DUMP: ContextIds: 4 344; DUMP: CalleeEdges: 345; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 346; DUMP: CallerEdges: 347 348; DUMP: Node [[A2]] 349; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 7 (clone 0) 350; DUMP: AllocTypes: NotColdCold 351; DUMP: ContextIds: 5 6 352; DUMP: CalleeEdges: 353; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 354; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 355; DUMP: CallerEdges: 356 357; DUMP: Node [[A3]] 358; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 8 (clone 0) 359; DUMP: AllocTypes: NotColdCold 360; DUMP: ContextIds: 7 8 361; DUMP: CalleeEdges: 362; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 363; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 364; DUMP: CallerEdges: 365 366; DUMP: Node [[A1]] 367; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3 (clone 0) 368; DUMP: AllocTypes: NotColdCold 369; DUMP: ContextIds: 9 10 370; DUMP: CalleeEdges: 371; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 372; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 373; DUMP: CallerEdges: 374 375; DUMP: Node [[A4]] 376; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 9 (clone 0) 377; DUMP: AllocTypes: NotColdCold 378; DUMP: ContextIds: 11 12 379; DUMP: CalleeEdges: 380; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 381; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 382; DUMP: CallerEdges: 383 384; DUMP: Node [[A]] 385; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 6 (clone 0) 386; DUMP: AllocTypes: NotColdCold 387; DUMP: ContextIds: 2 3 388; DUMP: CalleeEdges: 389; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 390; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 391; DUMP: CallerEdges: 392