1;; Test context disambiguation for a callgraph containing multiple memprof 2;; contexts and no inlining, where we need to perform additional cloning 3;; during function assignment/cloning to handle the combination of contexts 4;; to 2 different allocations. 5;; 6;; void E(char **buf1, char **buf2) { 7;; *buf1 = new char[10]; 8;; *buf2 = new char[10]; 9;; } 10;; 11;; void B(char **buf1, char **buf2) { 12;; E(buf1, buf2); 13;; } 14;; 15;; void C(char **buf1, char **buf2) { 16;; E(buf1, buf2); 17;; } 18;; 19;; void D(char **buf1, char **buf2) { 20;; E(buf1, buf2); 21;; } 22;; int main(int argc, char **argv) { 23;; char *cold1, *cold2, *default1, *default2, *default3, *default4; 24;; B(&default1, &default2); 25;; C(&default3, &cold1); 26;; D(&cold2, &default4); 27;; memset(cold1, 0, 10); 28;; memset(cold2, 0, 10); 29;; memset(default1, 0, 10); 30;; memset(default2, 0, 10); 31;; memset(default3, 0, 10); 32;; memset(default4, 0, 10); 33;; delete[] default1; 34;; delete[] default2; 35;; delete[] default3; 36;; delete[] default4; 37;; sleep(10); 38;; delete[] cold1; 39;; delete[] cold2; 40;; return 0; 41;; } 42;; 43;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the 44;; memory freed after sleep(10) results in cold lifetimes. 45;; 46;; The IR was then reduced using llvm-reduce with the expected FileCheck input. 47 48;; -stats requires asserts 49; REQUIRES: asserts 50 51 52; RUN: opt -thinlto-bc %s >%t.o 53; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 54; RUN: -supports-hot-cold-new \ 55; RUN: -r=%t.o,main,plx \ 56; RUN: -r=%t.o,_ZdaPv, \ 57; RUN: -r=%t.o,sleep, \ 58; RUN: -r=%t.o,_Znam, \ 59; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 60; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ 61; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \ 62; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS 63 64; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR 65 66 67;; Try again but with distributed ThinLTO 68; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ 69; RUN: -supports-hot-cold-new \ 70; RUN: -thinlto-distributed-indexes \ 71; RUN: -r=%t.o,main,plx \ 72; RUN: -r=%t.o,_ZdaPv, \ 73; RUN: -r=%t.o,sleep, \ 74; RUN: -r=%t.o,_Znam, \ 75; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ 76; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 77; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \ 78; RUN: --check-prefix=STATS 79 80;; Run ThinLTO backend 81; RUN: opt -passes=memprof-context-disambiguation \ 82; RUN: -memprof-import-summary=%t.o.thinlto.bc \ 83; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 84; RUN: %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \ 85; RUN: --check-prefix=STATS-BE --check-prefix=REMARKS 86 87 88source_filename = "funcassigncloning.ll" 89target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 90target triple = "x86_64-unknown-linux-gnu" 91 92; Function Attrs: noinline optnone 93define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 { 94entry: 95 %call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7 96 %call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15 97 ret void 98} 99 100declare ptr @_Znam(i64) 101 102define internal void @_Z1BPPcS0_() { 103entry: 104 call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16 105 ret void 106} 107 108define internal void @_Z1CPPcS0_() { 109entry: 110 call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17 111 ret void 112} 113 114define internal void @_Z1DPPcS0_() { 115entry: 116 call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18 117 ret void 118} 119 120; Function Attrs: noinline optnone 121define i32 @main() #0 { 122entry: 123 call void @_Z1BPPcS0_() 124 call void @_Z1CPPcS0_() 125 call void @_Z1DPPcS0_() 126 ret i32 0 127} 128 129declare void @_ZdaPv() 130 131declare i32 @sleep() 132 133; uselistorder directives 134uselistorder ptr @_Znam, { 1, 0 } 135 136attributes #0 = { noinline optnone } 137 138!0 = !{!1, !3, !5} 139!1 = !{!2, !"cold"} 140!2 = !{i64 -3461278137325233666, i64 -7799663586031895603} 141!3 = !{!4, !"notcold"} 142!4 = !{i64 -3461278137325233666, i64 -3483158674395044949} 143!5 = !{!6, !"notcold"} 144!6 = !{i64 -3461278137325233666, i64 -2441057035866683071} 145!7 = !{i64 -3461278137325233666} 146!8 = !{!9, !11, !13} 147!9 = !{!10, !"notcold"} 148!10 = !{i64 -1415475215210681400, i64 -2441057035866683071} 149!11 = !{!12, !"cold"} 150!12 = !{i64 -1415475215210681400, i64 -3483158674395044949} 151!13 = !{!14, !"notcold"} 152!14 = !{i64 -1415475215210681400, i64 -7799663586031895603} 153!15 = !{i64 -1415475215210681400} 154!16 = !{i64 -2441057035866683071} 155!17 = !{i64 -3483158674395044949} 156!18 = !{i64 -7799663586031895603} 157 158 159;; Originally we create a single clone of each call to new from E, since each 160;; allocates cold memory for a single caller. 161 162; DUMP: CCG after cloning: 163; DUMP: Callsite Context Graph: 164; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]] 165; DUMP: Versions: 1 MIB: 166; DUMP: AllocType 2 StackIds: 0 167; DUMP: AllocType 1 StackIds: 1 168; DUMP: AllocType 1 StackIds: 2 169; DUMP: (clone 0) 170; DUMP: AllocTypes: NotCold 171; DUMP: ContextIds: 2 3 172; DUMP: CalleeEdges: 173; DUMP: CallerEdges: 174; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 175; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 176; DUMP: Clones: [[ENEW1CLONE:0x[a-z0-9]+]] 177 178; DUMP: Node [[D:0x[a-z0-9]+]] 179; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0) 180; DUMP: AllocTypes: NotColdCold 181; DUMP: ContextIds: 1 6 182; DUMP: CalleeEdges: 183; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 184; DUMP: Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 185; DUMP: CallerEdges: 186 187; DUMP: Node [[C]] 188; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0) 189; DUMP: AllocTypes: NotColdCold 190; DUMP: ContextIds: 2 5 191; DUMP: CalleeEdges: 192; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2 193; DUMP: Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 194; DUMP: CallerEdges: 195 196; DUMP: Node [[B]] 197; DUMP: Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0) 198; DUMP: AllocTypes: NotCold 199; DUMP: ContextIds: 3 4 200; DUMP: CalleeEdges: 201; DUMP: Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3 202; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 203; DUMP: CallerEdges: 204 205; DUMP: Node [[ENEW2ORIG]] 206; DUMP: Versions: 1 MIB: 207; DUMP: AllocType 1 StackIds: 2 208; DUMP: AllocType 2 StackIds: 1 209; DUMP: AllocType 1 StackIds: 0 210; DUMP: (clone 0) 211; DUMP: AllocTypes: NotCold 212; DUMP: ContextIds: 4 6 213; DUMP: CalleeEdges: 214; DUMP: CallerEdges: 215; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4 216; DUMP: Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6 217; DUMP: Clones: [[ENEW2CLONE]] 218 219; DUMP: Node [[ENEW1CLONE]] 220; DUMP: Versions: 1 MIB: 221; DUMP: AllocType 2 StackIds: 0 222; DUMP: AllocType 1 StackIds: 1 223; DUMP: AllocType 1 StackIds: 2 224; DUMP: (clone 0) 225; DUMP: AllocTypes: Cold 226; DUMP: ContextIds: 1 227; DUMP: CalleeEdges: 228; DUMP: CallerEdges: 229; DUMP: Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1 230; DUMP: Clone of [[ENEW1ORIG]] 231 232; DUMP: Node [[ENEW2CLONE]] 233; DUMP: Versions: 1 MIB: 234; DUMP: AllocType 1 StackIds: 2 235; DUMP: AllocType 2 StackIds: 1 236; DUMP: AllocType 1 StackIds: 0 237; DUMP: (clone 0) 238; DUMP: AllocTypes: Cold 239; DUMP: ContextIds: 5 240; DUMP: CalleeEdges: 241; DUMP: CallerEdges: 242; DUMP: Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5 243; DUMP: Clone of [[ENEW2ORIG]] 244 245 246;; We greedily create a clone of E that is initially used by the clones of the 247;; first call to new. However, we end up with an incompatible set of callers 248;; given the second call to new which has clones with a different combination of 249;; callers. Eventually, we create 2 more clones, and the first clone becomes dead. 250; REMARKS: created clone _Z1EPPcS0_.memprof.1 251; REMARKS: created clone _Z1EPPcS0_.memprof.2 252; REMARKS: created clone _Z1EPPcS0_.memprof.3 253; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold 254; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold 255; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold 256; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold 257; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold 258; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold 259; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3 260; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2 261 262 263;; Original version of E is used for the non-cold allocations, both from B. 264; IR: define internal {{.*}} @_Z1EPPcS0_( 265; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] 266; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 267; IR: define internal {{.*}} @_Z1BPPcS0_( 268; IR: call {{.*}} @_Z1EPPcS0_( 269;; C calls a clone of E with the first new allocating cold memory and the 270;; second allocating non-cold memory. 271; IR: define internal {{.*}} @_Z1CPPcS0_( 272; IR: call {{.*}} @_Z1EPPcS0_.memprof.3( 273;; D calls a clone of E with the first new allocating non-cold memory and the 274;; second allocating cold memory. 275; IR: define internal {{.*}} @_Z1DPPcS0_( 276; IR: call {{.*}} @_Z1EPPcS0_.memprof.2( 277; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2( 278; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] 279; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 280; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3( 281; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 282; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] 283; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } 284; IR: attributes #[[COLD]] = { "memprof"="cold" } 285 286 287; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 288; STATS-BE: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend 289; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 290; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend 291; STATS-BE: 8 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend 292; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis 293; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend 294; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend 295; STATS-BE: 4 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend 296; STATS-BE: 2 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend 297