xref: /llvm-project/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll (revision bfe7205975a63a605ff3faacd97fe4c1bf4c19b3)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. Also tests graph and IR
5;; cloning.
6;;
7;; Original code looks like:
8;;
9;; char *D() {
10;;   return new char[10];
11;; }
12;;
13;; char *F() {
14;;   return D();
15;; }
16;;
17;; char *C() {
18;;   return D();
19;; }
20;;
21;; char *B() {
22;;   return C();
23;; }
24;;
25;; char *E() {
26;;   return C();
27;; }
28;; int main(int argc, char **argv) {
29;;   char *x = B(); // cold
30;;   char *y = E(); // cold
31;;   char *z = F(); // default
32;;   memset(x, 0, 10);
33;;   memset(y, 0, 10);
34;;   memset(z, 0, 10);
35;;   delete[] z;
36;;   sleep(10);
37;;   delete[] x;
38;;   delete[] y;
39;;   return 0;
40;; }
41;;
42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43;; memory freed after sleep(10) results in cold lifetimes.
44;;
45;; The code below was created by forcing inlining of C into both B and E.
46;; Since both allocation contexts via C are cold, the matched memprof
47;; metadata has the context pruned above C's callsite. This requires
48;; matching the stack node for C to callsites where it was inlined (i.e.
49;; the callsites in B and E that have callsite metadata that includes C's).
50;; It also requires duplication of that node in the graph as well as the
51;; duplication of the context ids along that path through the graph,
52;; so that we can represent the duplicated (via inlining) C callsite.
53;;
54;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
55
56; RUN: opt -passes=memprof-context-disambiguation \
57; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
58; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
59; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
60; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
61; RUN:  --check-prefix=STATS --check-prefix=REMARKS
62
63; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
64; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
65;; We should clone D once for the cold allocations via C.
66; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
67
68target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
69target triple = "x86_64-unknown-linux-gnu"
70
71define internal ptr @_Z1Dv() {
72entry:
73  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
74  ret ptr null
75}
76
77declare ptr @_Znam(i64)
78
79define internal ptr @_Z1Fv() #0 {
80entry:
81  %call = call noundef ptr @_Z1Dv(), !callsite !6
82  ret ptr null
83}
84
85; Function Attrs: mustprogress noinline optnone uwtable
86define internal ptr @_Z1Cv() #1 {
87entry:
88  %call = call noundef ptr @_Z1Dv(), !callsite !7
89  ret ptr null
90}
91
92; Function Attrs: mustprogress noinline optnone uwtable
93define internal ptr @_Z1Bv() #1 {
94entry:
95  %call.i = call noundef ptr @_Z1Dv(), !callsite !8
96  ret ptr null
97}
98
99; Function Attrs: mustprogress noinline optnone uwtable
100define internal ptr @_Z1Ev() #1 {
101entry:
102  %call.i = call noundef ptr @_Z1Dv(), !callsite !9
103  ret ptr null
104}
105
106; Function Attrs: noinline
107declare i32 @main() #2
108
109; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
110declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
111
112; Function Attrs: nounwind
113declare void @_ZdaPv() #4
114
115declare i32 @sleep() #5
116
117attributes #0 = { "disable-tail-calls"="true" }
118attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
119attributes #2 = { noinline }
120attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
121attributes #4 = { nounwind }
122attributes #5 = { "no-trapping-math"="true" }
123attributes #6 = { builtin }
124
125!0 = !{!1, !3}
126!1 = !{!2, !"cold"}
127!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
128!3 = !{!4, !"notcold"}
129!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
130!5 = !{i64 6541423618768552252}
131!6 = !{i64 -4903163940066524832}
132!7 = !{i64 -6270142974039008131}
133!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
134!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
135
136
137;; After adding only the alloc node memprof metadata, we only have 2 contexts.
138
139; DUMP: CCG before updating call stack chains:
140; DUMP: Callsite Context Graph:
141; DUMP: Node [[D:0x[a-z0-9]+]]
142; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
143; DUMP: 	AllocTypes: NotColdCold
144; DUMP: 	ContextIds: 1 2
145; DUMP: 	CalleeEdges:
146; DUMP: 	CallerEdges:
147; DUMP: 		Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
148; DUMP: 		Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
149
150; DUMP: Node [[C]]
151; DUMP: 	null Call
152; DUMP: 	AllocTypes: Cold
153; DUMP: 	ContextIds: 1
154; DUMP: 	CalleeEdges:
155; DUMP: 		Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
156; DUMP: 	CallerEdges:
157
158; DUMP: Node [[F]]
159; DUMP: 	null Call
160; DUMP: 	AllocTypes: NotCold
161; DUMP: 	ContextIds: 2
162; DUMP: 	CalleeEdges:
163; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
164; DUMP: 	CallerEdges:
165
166;; After updating for callsite metadata, we should have generated context ids 3 and 4,
167;; along with 2 new nodes for those callsites. All have the same allocation type
168;; behavior as the original C node.
169
170; DUMP: CCG before cloning:
171; DUMP: Callsite Context Graph:
172; DUMP: Node [[D]]
173; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
174; DUMP: 	AllocTypes: NotColdCold
175; DUMP: 	ContextIds: 1 2 3 4
176; DUMP: 	CalleeEdges:
177; DUMP: 	CallerEdges:
178; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
179; DUMP: 		Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
180; DUMP: 		Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
181; DUMP: 		Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
182
183; DUMP: Node [[F]]
184; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
185; DUMP: 	AllocTypes: NotCold
186; DUMP: 	ContextIds: 2
187; DUMP: 	CalleeEdges:
188; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
189; DUMP: 	CallerEdges:
190
191; DUMP: Node [[C2]]
192; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
193; DUMP: 	AllocTypes: Cold
194; DUMP: 	ContextIds: 3
195; DUMP: 	CalleeEdges:
196; DUMP: 		Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
197; DUMP: 	CallerEdges:
198
199; DUMP: Node [[B]]
200; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
201; DUMP: 	AllocTypes: Cold
202; DUMP: 	ContextIds: 4
203; DUMP: 	CalleeEdges:
204; DUMP: 		Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
205; DUMP: 	CallerEdges:
206
207; DUMP: Node [[E]]
208; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
209; DUMP: 	AllocTypes: Cold
210; DUMP: 	ContextIds: 1
211; DUMP: 	CalleeEdges:
212; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
213; DUMP: 	CallerEdges:
214
215; DUMP: CCG after cloning:
216; DUMP: Callsite Context Graph:
217; DUMP: Node [[D]]
218; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
219; DUMP: 	AllocTypes: NotCold
220; DUMP: 	ContextIds: 2
221; DUMP: 	CalleeEdges:
222; DUMP: 	CallerEdges:
223; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
224; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
225
226; DUMP: Node [[F]]
227; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
228; DUMP: 	AllocTypes: NotCold
229; DUMP: 	ContextIds: 2
230; DUMP: 	CalleeEdges:
231; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
232; DUMP: 	CallerEdges:
233
234; DUMP: Node [[C2]]
235; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
236; DUMP: 	AllocTypes: Cold
237; DUMP: 	ContextIds: 3
238; DUMP: 	CalleeEdges:
239; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
240; DUMP: 	CallerEdges:
241
242; DUMP: Node [[B]]
243; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
244; DUMP: 	AllocTypes: Cold
245; DUMP: 	ContextIds: 4
246; DUMP: 	CalleeEdges:
247; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
248; DUMP: 	CallerEdges:
249
250; DUMP: Node [[E]]
251; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
252; DUMP: 	AllocTypes: Cold
253; DUMP: 	ContextIds: 1
254; DUMP: 	CalleeEdges:
255; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
256; DUMP: 	CallerEdges:
257
258; DUMP: Node [[D2]]
259; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
260; DUMP: 	AllocTypes: Cold
261; DUMP: 	ContextIds: 1 3 4
262; DUMP: 	CalleeEdges:
263; DUMP: 	CallerEdges:
264; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
265; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
266; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
267; DUMP:         Clone of [[D]]
268
269; REMARKS: created clone _Z1Dv.memprof.1
270; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
271; REMARKS: call in clone _Z1Cv assigned to call function clone _Z1Dv.memprof.1
272; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
273; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
274; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Dv
275; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
276
277
278;; The allocation via F does not allocate cold memory. It should call the
279;; original D, which ultimately call the original allocation decorated
280;; with a "notcold" attribute.
281; IR: define internal {{.*}} @_Z1Dv()
282; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
283; IR: define internal {{.*}} @_Z1Fv()
284; IR:   call {{.*}} @_Z1Dv()
285;; The allocations via B and E allocate cold memory. They should call the
286;; cloned D, which ultimately call the cloned allocation decorated with a
287;; "cold" attribute.
288; IR: define internal {{.*}} @_Z1Bv()
289; IR:   call {{.*}} @_Z1Dv.memprof.1()
290; IR: define internal {{.*}} @_Z1Ev()
291; IR:   call {{.*}} @_Z1Dv.memprof.1()
292; IR: define internal {{.*}} @_Z1Dv.memprof.1()
293; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
294; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
295; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
296
297
298; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
299; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
300; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
301
302
303; DOTPRE: digraph "prestackupdate" {
304; DOTPRE: 	label="prestackupdate";
305; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
306; DOTPRE: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
307; DOTPRE: 	Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
308; DOTPRE: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
309; DOTPRE: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
310; DOTPRE: }
311
312
313; DOTPOST:digraph "postbuild" {
314; DOTPOST:	label="postbuild";
315; DOTPOST:	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
316; DOTPOST:	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
317; DOTPOST:	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
318; DOTPOST:	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
319; DOTPOST:	Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
320; DOTPOST:	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
321; DOTPOST:	Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
322; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
323; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
324; DOTPOST:}
325
326
327; DOTCLONED: digraph "cloned" {
328; DOTCLONED: 	label="cloned";
329; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
330; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
331; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
332; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
333; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
334; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
335; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
336; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
337; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
338; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
339; DOTCLONED: }
340