1;; Test context disambiguation for a callgraph containing multiple memprof 2;; contexts and no inlining, where we need to perform additional cloning 3;; during function assignment/cloning to handle the combination of contexts 4;; to 2 different allocations. 5;; 6;; void E(char **buf1, char **buf2) { 7;; *buf1 = new char[10]; 8;; *buf2 = new char[10]; 9;; } 10;; 11;; void B(char **buf1, char **buf2) { 12;; E(buf1, buf2); 13;; } 14;; 15;; void C(char **buf1, char **buf2) { 16;; E(buf1, buf2); 17;; } 18;; 19;; void D(char **buf1, char **buf2) { 20;; E(buf1, buf2); 21;; } 22;; int main(int argc, char **argv) { 23;; char *cold1, *cold2, *default1, *default2, *default3, *default4; 24;; B(&default1, &default2); 25;; C(&default3, &cold1); 26;; D(&cold2, &default4); 27;; memset(cold1, 0, 10); 28;; memset(cold2, 0, 10); 29;; memset(default1, 0, 10); 30;; memset(default2, 0, 10); 31;; memset(default3, 0, 10); 32;; memset(default4, 0, 10); 33;; delete[] default1; 34;; delete[] default2; 35;; delete[] default3; 36;; delete[] default4; 37;; sleep(10); 38;; delete[] cold1; 39;; delete[] cold2; 40;; return 0; 41;; } 42;; 43;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the 44;; memory freed after sleep(10) results in cold lifetimes. 45;; 46;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 47 48;; -stats requires asserts 49; REQUIRES: asserts 50 51; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ 52; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 53; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 54; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \ 55; RUN: --check-prefix=STATS --check-prefix=REMARKS 56 57 58target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 59target triple = "x86_64-unknown-linux-gnu" 60 61define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 { 62entry: 63 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !7 64 %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !8, !callsite !15 65 ret void 66} 67 68declare ptr @_Znam(i64) #1 69 70define internal void @_Z1BPPcS0_(ptr %0, ptr %1) { 71entry: 72 call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !16 73 ret void 74} 75 76; Function Attrs: noinline 77define internal void @_Z1CPPcS0_(ptr %0, ptr %1) #2 { 78entry: 79 call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !17 80 ret void 81} 82 83define internal void @_Z1DPPcS0_(ptr %0, ptr %1) #3 { 84entry: 85 call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !18 86 ret void 87} 88 89; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) 90declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 91 92declare i32 @sleep() #5 93 94; uselistorder directives 95uselistorder ptr @_Znam, { 1, 0 } 96 97attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } 98attributes #1 = { "no-trapping-math"="true" } 99attributes #2 = { noinline } 100attributes #3 = { "frame-pointer"="all" } 101attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } 102attributes #5 = { "disable-tail-calls"="true" } 103attributes #6 = { builtin } 104 105!0 = !{!1, !3, !5} 106!1 = !{!2, !"cold"} 107!2 = !{i64 -3461278137325233666, i64 -7799663586031895603} 108!3 = !{!4, !"notcold"} 109!4 = !{i64 -3461278137325233666, i64 -3483158674395044949} 110!5 = !{!6, !"notcold"} 111!6 = !{i64 -3461278137325233666, i64 -2441057035866683071} 112!7 = !{i64 -3461278137325233666} 113!8 = !{!9, !11, !13} 114!9 = !{!10, !"notcold"} 115!10 = !{i64 -1415475215210681400, i64 -2441057035866683071} 116!11 = !{!12, !"cold"} 117!12 = !{i64 -1415475215210681400, i64 -3483158674395044949} 118!13 = !{!14, !"notcold"} 119!14 = !{i64 -1415475215210681400, i64 -7799663586031895603} 120!15 = !{i64 -1415475215210681400} 121!16 = !{i64 -2441057035866683071} 122!17 = !{i64 -3483158674395044949} 123!18 = !{i64 -7799663586031895603} 124 125 126;; Originally we create a single clone of each call to new from E, since each 127;; allocates cold memory for a single caller. 128 129; DUMP: CCG after cloning: 130; DUMP: Callsite Context Graph: 131; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]] 132; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 133; DUMP: AllocTypes: NotCold 134; DUMP: ContextIds: 2 3 135; DUMP: CalleeEdges: 136; DUMP: CallerEdges: 137; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 138; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 139; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]] 140 141; DUMP: Node [[D:0x[a-z0-9]+]] 142; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0) 143; DUMP: AllocTypes: NotColdCold 144; DUMP: ContextIds: 1 6 145; DUMP: CalleeEdges: 146; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 147; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 148; DUMP: CallerEdges: 149 150; DUMP: Node [[C]] 151; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0) 152; DUMP: AllocTypes: NotColdCold 153; DUMP: ContextIds: 2 5 154; DUMP: CalleeEdges: 155; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2 156; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 157; DUMP: CallerEdges: 158 159; DUMP: Node [[B]] 160; DUMP: call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0) 161; DUMP: AllocTypes: NotCold 162; DUMP: ContextIds: 3 4 163; DUMP: CalleeEdges: 164; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3 165; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 166; DUMP: CallerEdges: 167 168; DUMP: Node [[ENEW2ORIG]] 169; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 170; DUMP: AllocTypes: NotCold 171; DUMP: ContextIds: 4 6 172; DUMP: CalleeEdges: 173; DUMP: CallerEdges: 174; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 175; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 176; DUMP: Clones: [[ENEW2CLONE]] 177 178; DUMP: Node [[ENEW1CLONE]] 179; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 180; DUMP: AllocTypes: Cold 181; DUMP: ContextIds: 1 182; DUMP: CalleeEdges: 183; DUMP: CallerEdges: 184; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 185; DUMP: Clone of [[ENEW1ORIG]] 186 187; DUMP: Node [[ENEW2CLONE]] 188; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) 189; DUMP: AllocTypes: Cold 190; DUMP: ContextIds: 5 191; DUMP: CalleeEdges: 192; DUMP: CallerEdges: 193; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 194; DUMP: Clone of [[ENEW2ORIG]] 195 196 197;; We greedily create a clone of E that is initially used by the clones of the 198;; first call to new. However, we end up with an incompatible set of callers 199;; given the second call to new which has clones with a different combination of 200;; callers. Eventually, we create 2 more clones, and the first clone becomes dead. 201; REMARKS: created clone _Z1EPPcS0_.memprof.1 202; REMARKS: created clone _Z1EPPcS0_.memprof.2 203; REMARKS: created clone _Z1EPPcS0_.memprof.3 204; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2 205; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold 206; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3 207; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold 208; REMARKS: call in clone _Z1BPPcS0_ assigned to call function clone _Z1EPPcS0_ 209; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold 210; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold 211; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold 212; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold 213 214 215;; Original version of E is used for the non-cold allocations, both from B. 216; IR: define internal {{.*}} @_Z1EPPcS0_( 217; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] 218; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 219; IR: define internal {{.*}} @_Z1BPPcS0_( 220; IR: call {{.*}} @_Z1EPPcS0_( 221;; C calls a clone of E with the first new allocating cold memory and the 222;; second allocating non-cold memory. 223; IR: define internal {{.*}} @_Z1CPPcS0_( 224; IR: call {{.*}} @_Z1EPPcS0_.memprof.3( 225;; D calls a clone of E with the first new allocating non-cold memory and the 226;; second allocating cold memory. 227; IR: define internal {{.*}} @_Z1DPPcS0_( 228; IR: call {{.*}} @_Z1EPPcS0_.memprof.2( 229;; Transient clone that will get removed as it ends up with no callers. 230;; Its calls to new never get updated with a memprof attribute as a result. 231; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.1( 232; IR: call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT:[0-9]+]] 233; IR: call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT]] 234; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2( 235; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] 236; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 237; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3( 238; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 239; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] 240; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } 241; IR: attributes #[[DEFAULT]] = { builtin } 242; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } 243 244 245; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 246; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 247; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis 248