xref: /llvm-project/llvm/test/Transforms/MemProfContextDisambiguation/funcassigncloning.ll (revision 176889868024d98db032842bc47b416997d9e349)
1;; Test context disambiguation for a callgraph containing multiple memprof
2;; contexts and no inlining, where we need to perform additional cloning
3;; during function assignment/cloning to handle the combination of contexts
4;; to 2 different allocations.
5;;
6;; void E(char **buf1, char **buf2) {
7;;   *buf1 = new char[10];
8;;   *buf2 = new char[10];
9;; }
10;;
11;; void B(char **buf1, char **buf2) {
12;;   E(buf1, buf2);
13;; }
14;;
15;; void C(char **buf1, char **buf2) {
16;;   E(buf1, buf2);
17;; }
18;;
19;; void D(char **buf1, char **buf2) {
20;;   E(buf1, buf2);
21;; }
22;; int main(int argc, char **argv) {
23;;   char *cold1, *cold2, *default1, *default2, *default3, *default4;
24;;   B(&default1, &default2);
25;;   C(&default3, &cold1);
26;;   D(&cold2, &default4);
27;;   memset(cold1, 0, 10);
28;;   memset(cold2, 0, 10);
29;;   memset(default1, 0, 10);
30;;   memset(default2, 0, 10);
31;;   memset(default3, 0, 10);
32;;   memset(default4, 0, 10);
33;;   delete[] default1;
34;;   delete[] default2;
35;;   delete[] default3;
36;;   delete[] default4;
37;;   sleep(10);
38;;   delete[] cold1;
39;;   delete[] cold2;
40;;   return 0;
41;; }
42;;
43;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
44;; memory freed after sleep(10) results in cold lifetimes.
45;;
46;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
47
48;; -stats requires asserts
49; REQUIRES: asserts
50
51; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
52; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
53; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
54; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
55; RUN:  --check-prefix=STATS --check-prefix=REMARKS
56
57
58target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
59target triple = "x86_64-unknown-linux-gnu"
60
61define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 {
62entry:
63  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !7
64  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !8, !callsite !15
65  ret void
66}
67
68declare ptr @_Znam(i64) #1
69
70define internal void @_Z1BPPcS0_(ptr %0, ptr %1) {
71entry:
72  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !16
73  ret void
74}
75
76; Function Attrs: noinline
77define internal void @_Z1CPPcS0_(ptr %0, ptr %1) #2 {
78entry:
79  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !17
80  ret void
81}
82
83define internal void @_Z1DPPcS0_(ptr %0, ptr %1) #3 {
84entry:
85  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1), !callsite !18
86  ret void
87}
88
89; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
90declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
91
92declare i32 @sleep() #5
93
94; uselistorder directives
95uselistorder ptr @_Znam, { 1, 0 }
96
97attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
98attributes #1 = { "no-trapping-math"="true" }
99attributes #2 = { noinline }
100attributes #3 = { "frame-pointer"="all" }
101attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
102attributes #5 = { "disable-tail-calls"="true" }
103attributes #6 = { builtin }
104
105!0 = !{!1, !3, !5}
106!1 = !{!2, !"cold"}
107!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
108!3 = !{!4, !"notcold"}
109!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
110!5 = !{!6, !"notcold"}
111!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
112!7 = !{i64 -3461278137325233666}
113!8 = !{!9, !11, !13}
114!9 = !{!10, !"notcold"}
115!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
116!11 = !{!12, !"cold"}
117!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
118!13 = !{!14, !"notcold"}
119!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
120!15 = !{i64 -1415475215210681400}
121!16 = !{i64 -2441057035866683071}
122!17 = !{i64 -3483158674395044949}
123!18 = !{i64 -7799663586031895603}
124
125
126;; Originally we create a single clone of each call to new from E, since each
127;; allocates cold memory for a single caller.
128
129; DUMP: CCG after cloning:
130; DUMP: Callsite Context Graph:
131; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
132; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
133; DUMP: 	AllocTypes: NotCold
134; DUMP: 	ContextIds: 2 3
135; DUMP: 	CalleeEdges:
136; DUMP: 	CallerEdges:
137; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
138; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
139; DUMP: 	Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
140
141; DUMP: Node [[D:0x[a-z0-9]+]]
142; DUMP:           call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1) (clone 0)
143; DUMP:         AllocTypes: NotColdCold
144; DUMP:         ContextIds: 1 6
145; DUMP:         CalleeEdges:
146; DUMP:                 Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
147; DUMP:                 Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
148; DUMP:         CallerEdges:
149
150; DUMP: Node [[C]]
151; DUMP: 	  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1)	(clone 0)
152; DUMP: 	AllocTypes: NotColdCold
153; DUMP: 	ContextIds: 2 5
154; DUMP: 	CalleeEdges:
155; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
156; DUMP: 		Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
157; DUMP: 	CallerEdges:
158
159; DUMP: Node [[B]]
160; DUMP: 	  call void @_Z1EPPcS0_(ptr noundef %0, ptr noundef %1)	(clone 0)
161; DUMP: 	AllocTypes: NotCold
162; DUMP: 	ContextIds: 3 4
163; DUMP: 	CalleeEdges:
164; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
165; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
166; DUMP: 	CallerEdges:
167
168; DUMP: Node [[ENEW2ORIG]]
169; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
170; DUMP: 	AllocTypes: NotCold
171; DUMP: 	ContextIds: 4 6
172; DUMP: 	CalleeEdges:
173; DUMP: 	CallerEdges:
174; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
175; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
176; DUMP: 	Clones: [[ENEW2CLONE]]
177
178; DUMP: Node [[ENEW1CLONE]]
179; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
180; DUMP: 	AllocTypes: Cold
181; DUMP: 	ContextIds: 1
182; DUMP: 	CalleeEdges:
183; DUMP: 	CallerEdges:
184; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
185; DUMP: 	Clone of [[ENEW1ORIG]]
186
187; DUMP: Node [[ENEW2CLONE]]
188; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
189; DUMP: 	AllocTypes: Cold
190; DUMP: 	ContextIds: 5
191; DUMP: 	CalleeEdges:
192; DUMP: 	CallerEdges:
193; DUMP: 		Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
194; DUMP: 	Clone of [[ENEW2ORIG]]
195
196
197;; We greedily create a clone of E that is initially used by the clones of the
198;; first call to new. However, we end up with an incompatible set of callers
199;; given the second call to new which has clones with a different combination of
200;; callers. Eventually, we create 2 more clones, and the first clone becomes dead.
201; REMARKS: created clone _Z1EPPcS0_.memprof.1
202; REMARKS: created clone _Z1EPPcS0_.memprof.2
203; REMARKS: created clone _Z1EPPcS0_.memprof.3
204; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2
205; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold
206; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3
207; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold
208; REMARKS: call in clone _Z1BPPcS0_ assigned to call function clone _Z1EPPcS0_
209; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
210; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold
211; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold
212; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
213
214
215;; Original version of E is used for the non-cold allocations, both from B.
216; IR: define internal {{.*}} @_Z1EPPcS0_(
217; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
218; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
219; IR: define internal {{.*}} @_Z1BPPcS0_(
220; IR:   call {{.*}} @_Z1EPPcS0_(
221;; C calls a clone of E with the first new allocating cold memory and the
222;; second allocating non-cold memory.
223; IR: define internal {{.*}} @_Z1CPPcS0_(
224; IR:   call {{.*}} @_Z1EPPcS0_.memprof.3(
225;; D calls a clone of E with the first new allocating non-cold memory and the
226;; second allocating cold memory.
227; IR: define internal {{.*}} @_Z1DPPcS0_(
228; IR:   call {{.*}} @_Z1EPPcS0_.memprof.2(
229;; Transient clone that will get removed as it ends up with no callers.
230;; Its calls to new never get updated with a memprof attribute as a result.
231; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.1(
232; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT:[0-9]+]]
233; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[DEFAULT]]
234; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
235; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
236; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
237; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
238; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
239; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
240; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
241; IR: attributes #[[DEFAULT]] = { builtin }
242; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
243
244
245; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
246; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
247; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
248