1;; Test callsite context graph generation for call graph with with MIBs 2;; that have pruned contexts that partially match multiple inlined 3;; callsite contexts, requiring duplication of context ids and nodes 4;; while matching callsite nodes onto the graph. This test requires more 5;; complex duplication due to multiple contexts for different allocations 6;; that share some of the same callsite nodes. 7;; 8;; Original code looks like: 9;; 10;; char *D(bool Call1) { 11;; if (Call1) 12;; return new char[10]; 13;; else 14;; return new char[10]; 15;; } 16;; 17;; char *C(bool Call1) { 18;; return D(Call1); 19;; } 20;; 21;; char *B(bool Call1) { 22;; if (Call1) 23;; return C(true); 24;; else 25;; return C(false); 26;; } 27;; 28;; char *A(bool Call1) { 29;; return B(Call1); 30;; } 31;; 32;; char *A1() { 33;; return A(true); 34;; } 35;; 36;; char *A2() { 37;; return A(true); 38;; } 39;; 40;; char *A3() { 41;; return A(false); 42;; } 43;; 44;; char *A4() { 45;; return A(false); 46;; } 47;; 48;; char *E() { 49;; return B(true); 50;; } 51;; 52;; char *F() { 53;; return B(false); 54;; } 55;; 56;; int main(int argc, char **argv) { 57;; char *a1 = A1(); // cold 58;; char *a2 = A2(); // cold 59;; char *e = E(); // default 60;; char *a3 = A3(); // default 61;; char *a4 = A4(); // default 62;; char *f = F(); // cold 63;; memset(a1, 0, 10); 64;; memset(a2, 0, 10); 65;; memset(e, 0, 10); 66;; memset(a3, 0, 10); 67;; memset(a4, 0, 10); 68;; memset(f, 0, 10); 69;; delete[] a3; 70;; delete[] a4; 71;; delete[] e; 72;; sleep(10); 73;; delete[] a1; 74;; delete[] a2; 75;; delete[] f; 76;; return 0; 77;; } 78;; 79;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the 80;; memory freed after sleep(10) results in cold lifetimes. 81;; 82;; The code below was created by forcing inlining of A into its callers, 83;; without any other inlining or optimizations. Since both allocation contexts 84;; via A for each allocation in D have the same allocation type (cold via 85;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second 86;; new in D, the contexts for those respective allocations are pruned above A. 87;; The allocations via E and F are to ensure we don't prune above B. 88;; 89;; The matching onto the inlined A[1234]->A sequences will require duplication 90;; of the context id assigned to the context from A for each allocation in D. 91;; This test ensures that we do this correctly in the presence of callsites 92;; shared by the different duplicated context ids (i.e. callsite in C). 93;; 94;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 95 96; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ 97; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 98; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ 99; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP 100 101 102target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 103target triple = "x86_64-unknown-linux-gnu" 104 105; Function Attrs: mustprogress noinline uwtable 106define ptr @_Z1Db(i1 %Call1) #0 { 107entry: 108 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 109 br label %return 110 111if.else: ; No predecessors! 112 %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11 113 br label %return 114 115return: ; preds = %if.else, %entry 116 ret ptr null 117} 118 119; Function Attrs: nobuiltin 120declare ptr @_Znam(i64) #1 121 122define ptr @_Z1Cb(i1 %Call1) { 123entry: 124 %tobool = trunc i8 0 to i1 125 %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12 126 ret ptr null 127} 128 129; Function Attrs: mustprogress noinline uwtable 130define ptr @_Z1Bb(i1 %Call1) #0 { 131entry: 132 %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13 133 br label %return 134 135if.else: ; No predecessors! 136 %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14 137 br label %return 138 139return: ; preds = %if.else, %entry 140 ret ptr null 141} 142 143define ptr @_Z1Ab(i1 %tobool) #2 { 144entry: 145 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15 146 ret ptr null 147} 148 149; Function Attrs: mustprogress noinline uwtable 150define ptr @_Z2A1v(i1 %tobool.i) #0 { 151entry: 152 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16 153 ret ptr null 154} 155 156; Function Attrs: mustprogress noinline uwtable 157define ptr @_Z2A2v(i1 %tobool.i) #0 { 158entry: 159 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17 160 ret ptr null 161} 162 163; Function Attrs: mustprogress noinline uwtable 164define ptr @_Z2A3v(i1 %tobool.i) #0 { 165entry: 166 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18 167 ret ptr null 168} 169 170; Function Attrs: mustprogress noinline uwtable 171define ptr @_Z2A4v(i1 %tobool.i) #0 { 172entry: 173 %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19 174 ret ptr null 175} 176 177; Function Attrs: mustprogress noinline uwtable 178define ptr @_Z1Ev() #0 { 179entry: 180 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20 181 ret ptr null 182} 183 184; Function Attrs: mustprogress noinline uwtable 185define ptr @_Z1Fv() #0 { 186entry: 187 %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21 188 ret ptr null 189} 190 191; Function Attrs: noinline 192declare i32 @main() #3 193 194; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) 195declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 196 197declare void @_ZdaPv() #5 198 199declare i32 @sleep() #6 200 201; uselistorder directives 202uselistorder ptr @_Znam, { 1, 0 } 203 204attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } 205attributes #1 = { nobuiltin } 206attributes #2 = { "tune-cpu"="generic" } 207attributes #3 = { noinline } 208attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } 209attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } 210attributes #6 = { "disable-tail-calls"="true" } 211attributes #7 = { builtin allocsize(0) } 212 213!0 = !{!1, !3} 214!1 = !{!2, !"notcold"} 215!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} 216!3 = !{!4, !"cold"} 217!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} 218!5 = !{i64 4854880825882961848} 219!6 = !{!7, !9} 220!7 = !{!8, !"notcold"} 221!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} 222!9 = !{!10, !"cold"} 223!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} 224!11 = !{i64 -8775068539491628272} 225!12 = !{i64 -904694911315397047} 226!13 = !{i64 6532298921261778285} 227!14 = !{i64 7859682663773658275} 228!15 = !{i64 -6528110295079665978} 229!16 = !{i64 -6528110295079665978, i64 5747919905719679568} 230!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} 231!18 = !{i64 -6528110295079665978, i64 1794685869326395337} 232!19 = !{i64 -6528110295079665978, i64 5462047985461644151} 233!20 = !{i64 1905834578520680781} 234!21 = !{i64 -4903163940066524832} 235 236 237;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only 238;; match the interesting parts of the pre-update graph here). 239 240; DUMP: CCG before updating call stack chains: 241; DUMP: Callsite Context Graph: 242 243; DUMP: Node [[D1:0x[a-z0-9]+]] 244; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 245; DUMP: AllocTypes: NotColdCold 246; DUMP: ContextIds: 1 2 247 248; DUMP: Node [[C:0x[a-z0-9]+]] 249; DUMP: null Call 250; DUMP: AllocTypes: NotColdCold 251; DUMP: ContextIds: 1 2 3 4 252; DUMP: CalleeEdges: 253; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 254; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 255 256; DUMP: Node [[D2]] 257; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 258; DUMP: AllocTypes: NotColdCold 259; DUMP: ContextIds: 3 4 260 261 262;; After updating for callsite metadata, we should have duplicated the context 263;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, 264;; and used those on new nodes for those callers. Note that while in reality 265;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, 266;; due to the pruning we have lost this information and thus end up duplicating 267;; both of A's contexts to all of the new nodes (which could result in some 268;; unnecessary cloning. 269 270; DUMP: CCG before cloning: 271; DUMP: Callsite Context Graph: 272; DUMP: Node [[D1]] 273; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 274; DUMP: AllocTypes: NotColdCold 275; DUMP: ContextIds: 1 2 5 7 9 11 276; DUMP: CalleeEdges: 277; DUMP: CallerEdges: 278; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 279 280; DUMP: Node [[C]] 281; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0) 282; DUMP: AllocTypes: NotColdCold 283; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 284; DUMP: CalleeEdges: 285; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 286; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 287; DUMP: CallerEdges: 288; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 289; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 290 291; DUMP: Node [[B1]] 292; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0) 293; DUMP: AllocTypes: NotColdCold 294; DUMP: ContextIds: 1 2 5 7 9 11 295; DUMP: CalleeEdges: 296; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 297; DUMP: CallerEdges: 298; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 299; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 300; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 301; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 302; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 303; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 304 305; DUMP: Node [[E]] 306; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0) 307; DUMP: AllocTypes: NotCold 308; DUMP: ContextIds: 1 309; DUMP: CalleeEdges: 310; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 311; DUMP: CallerEdges: 312 313; DUMP: Node [[D2]] 314; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) 315; DUMP: AllocTypes: NotColdCold 316; DUMP: ContextIds: 3 4 6 8 10 12 317; DUMP: CalleeEdges: 318; DUMP: CallerEdges: 319; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 320 321; DUMP: Node [[B2]] 322; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0) 323; DUMP: AllocTypes: NotColdCold 324; DUMP: ContextIds: 3 4 6 8 10 12 325; DUMP: CalleeEdges: 326; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 327; DUMP: CallerEdges: 328; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 329; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 330; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 331; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 332; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 333; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 334 335; DUMP: Node [[F]] 336; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0) 337; DUMP: AllocTypes: Cold 338; DUMP: ContextIds: 4 339; DUMP: CalleeEdges: 340; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 341; DUMP: CallerEdges: 342 343; DUMP: Node [[A2]] 344; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0) 345; DUMP: AllocTypes: NotColdCold 346; DUMP: ContextIds: 5 6 347; DUMP: CalleeEdges: 348; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 349; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 350; DUMP: CallerEdges: 351 352; DUMP: Node [[A3]] 353; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) 354; DUMP: AllocTypes: NotColdCold 355; DUMP: ContextIds: 7 8 356; DUMP: CalleeEdges: 357; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 358; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 359; DUMP: CallerEdges: 360 361; DUMP: Node [[A1]] 362; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) 363; DUMP: AllocTypes: NotColdCold 364; DUMP: ContextIds: 9 10 365; DUMP: CalleeEdges: 366; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 367; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 368; DUMP: CallerEdges: 369 370; DUMP: Node [[A4]] 371; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) 372; DUMP: AllocTypes: NotColdCold 373; DUMP: ContextIds: 11 12 374; DUMP: CalleeEdges: 375; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 376; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 377; DUMP: CallerEdges: 378 379; DUMP: Node [[A]] 380; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) 381; DUMP: AllocTypes: NotColdCold 382; DUMP: ContextIds: 2 3 383; DUMP: CalleeEdges: 384; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 385; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 386; DUMP: CallerEdges: 387