1;; This test ensures that the logic which assigns calls to stack nodes 2;; correctly handles an inlined callsite with stack ids that partially 3;; overlap with a trimmed context. In particular when it also partially 4;; overlaps with a longer non-trimmed context that doesn't match all of 5;; the inlined callsite stack ids. 6 7;; The profile data and call stacks were all manually added, but the code 8;; would be structured something like the following (fairly contrived to 9;; result in the type of control flow needed to test): 10 11;; void A(bool b) { 12;; if (b) 13;; // cold: stack ids 6, 2, 8 (trimmed ids 10) 14;; // not cold: stack ids 6, 7 (trimmed ids 9, 11) 15;; new char[10]; // stack id 6 16;; else 17;; // not cold: stack ids 1, 2, 8, 3, 4 18;; // cold: stack ids 1, 2, 8, 3, 5 19;; new char[10]; // stack id 1 20;; } 21;; 22;; void XZ() { 23;; A(false); // stack ids 2, 8 (e.g. X inlined into Z) 24;; } 25;; 26;; void XZN() { 27;; // This is the tricky one to get right. We want to ensure it gets 28;; // correctly correlated with a stack node for the trimmed 6, 2, 8 29;; // context shown in A. It should *not* be correlated with the longer 30;; // untrimmed 1, 2, 8, 3, 4|5 contexts. 31;; A(true); // stack ids 2, 8, 9 (e.g. X inlined into Z inlined into N) 32;; } 33;; 34;; void Y() { 35;; A(true); // stack id 7 36;; } 37;; 38;; void M() { 39;; XZ(); // stack id 3 40;; } 41;; 42;; int main() { 43;; M(); // stack id 4 (leads to not cold allocation) 44;; M(); // stack id 5 (leads to cold allocation) 45;; XZN(); // stack id 11 (leads to cold allocation) 46;; Y(); // stack id 10 (leads to not cold allocation) 47;; } 48 49;; -stats requires asserts 50; REQUIRES: asserts 51 52; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ 53; RUN: -memprof-verify-ccg -memprof-verify-nodes \ 54; RUN: -stats -pass-remarks=memprof-context-disambiguation \ 55; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=IR \ 56; RUN: --check-prefix=STATS --check-prefix=REMARKS 57 58; REMARKS: created clone _Z1Ab.memprof.1 59; REMARKS: created clone _Z2XZv.memprof.1 60; REMARKS: created clone _Z1Mv.memprof.1 61;; Make sure the inlined context in _Z3XZNv, which partially overlaps 62;; trimmed cold context, and also partially overlaps completely 63;; unrelated contexts, correctly calls a cloned version of Z1Ab, 64;; which will call the cold annotated allocation. 65; REMARKS: call in clone _Z3XZNv assigned to call function clone _Z1Ab.memprof.1 66; REMARKS: call in clone main assigned to call function clone _Z1Mv.memprof.1 67; REMARKS: call in clone _Z1Mv.memprof.1 assigned to call function clone _Z2XZv.memprof.1 68; REMARKS: call in clone _Z2XZv.memprof.1 assigned to call function clone _Z1Ab 69; REMARKS: call in clone main assigned to call function clone _Z1Mv 70; REMARKS: call in clone _Z1Mv assigned to call function clone _Z2XZv 71; REMARKS: call in clone _Z2XZv assigned to call function clone _Z1Ab.memprof.1 72; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute cold 73; REMARKS: call in clone _Z1Yv assigned to call function clone _Z1Ab 74; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute notcold 75; REMARKS: call in clone _Z1Ab marked with memprof allocation attribute cold 76; REMARKS: call in clone _Z1Ab.memprof.1 marked with memprof allocation attribute notcold 77 78 79target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 80target triple = "x86_64-unknown-linux-gnu" 81 82define dso_local void @_Z1Ab(i1 noundef zeroext %b) { 83entry: 84 br i1 %b, label %if.then, label %if.else 85 86if.then: 87 %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !5, !callsite !11 88 br label %if.end 89 90if.else: 91 %call2 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !10 92 br label %if.end 93 94if.end: 95 ret void 96} 97 98; Function Attrs: nobuiltin 99declare ptr @_Znam(i64) #0 100 101define dso_local void @_Z2XZv() local_unnamed_addr #0 { 102entry: 103 tail call void @_Z1Ab(i1 noundef zeroext false), !callsite !12 104 ret void 105} 106 107define dso_local void @_Z1Mv() local_unnamed_addr #0 { 108entry: 109 tail call void @_Z2XZv(), !callsite !19 110 ret void 111} 112 113define dso_local void @_Z3XZNv() local_unnamed_addr { 114entry: 115 tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !15 116 ret void 117} 118 119define dso_local void @_Z1Yv() local_unnamed_addr { 120entry: 121 tail call void @_Z1Ab(i1 noundef zeroext true), !callsite !17 122 ret void 123} 124 125define dso_local noundef i32 @main() local_unnamed_addr { 126entry: 127 tail call void @_Z1Mv(), !callsite !13 ;; Not cold context 128 tail call void @_Z1Mv(), !callsite !14 ;; Cold context 129 tail call void @_Z3XZNv(), !callsite !16 ;; Cold context 130 tail call void @_Z1Yv(), !callsite !18 ;; Not cold context 131 ret i32 0 132} 133 134attributes #0 = { nobuiltin } 135attributes #7 = { builtin } 136 137!0 = !{!1, !3} 138;; Not cold context via first call to _Z1Mv in main 139!1 = !{!2, !"notcold"} 140!2 = !{i64 1, i64 2, i64 8, i64 3, i64 4} 141;; Cold context via second call to _Z1Mv in main 142!3 = !{!4, !"cold"} 143!4 = !{i64 1, i64 2, i64 8, i64 3, i64 5} 144!5 = !{!6, !8} 145;; Cold (trimmed) context via call to _Z3XZNv in main 146!6 = !{!7, !"cold"} 147!7 = !{i64 6, i64 2, i64 8} 148;; Not cold (trimmed) context via call to _Z1Yv in main 149!8 = !{!9, !"notcold"} 150!9 = !{i64 6, i64 7} 151!10 = !{i64 1} 152!11 = !{i64 6} 153!12 = !{i64 2, i64 8} 154!13 = !{i64 4} 155!14 = !{i64 5} 156;; Inlined context in _Z3XZNv, which includes part of trimmed cold context 157!15 = !{i64 2, i64 8, i64 9} 158!16 = !{i64 11} 159!17 = !{i64 7} 160!18 = !{i64 10} 161!19 = !{i64 3} 162 163; IR: define {{.*}} @_Z1Ab(i1 noundef zeroext %b) 164; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]] 165; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]] 166; IR: define {{.*}} @_Z2XZv() 167; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext false) 168; IR: define {{.*}} @_Z1Mv() 169; IR: call {{.*}} @_Z2XZv() 170;; Make sure the inlined context in _Z3XZNv, which partially overlaps 171;; trimmed cold context, and also partially overlaps completely 172;; unrelated contexts, correctly calls the cloned version of Z1Ab 173;; that will call the cold annotated allocation. 174; IR: define {{.*}} @_Z3XZNv() 175; IR: call {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext true) 176; IR: define {{.*}} @_Z1Yv() 177; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext true) 178; IR: define {{.*}} @main() 179; IR: call {{.*}} @_Z1Mv() 180; IR: call {{.*}} @_Z1Mv.memprof.1() 181; IR: call {{.*}} @_Z3XZNv() 182; IR: call {{.*}} @_Z1Yv() 183; IR: define {{.*}} @_Z1Ab.memprof.1(i1 noundef zeroext %b) 184; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]] 185; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]] 186; IR: define {{.*}} @_Z2XZv.memprof.1() 187; IR: call {{.*}} @_Z1Ab(i1 noundef zeroext false) 188; IR: define {{.*}} @_Z1Mv.memprof.1() 189; IR: call {{.*}} @_Z2XZv.memprof.1() 190 191; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" } 192; IR: attributes #[[COLD]] = { builtin "memprof"="cold" } 193 194; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) 195; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) 196; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis 197