xref: /llvm-project/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll (revision 176889868024d98db032842bc47b416997d9e349)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. Also tests graph and IR
5;; cloning.
6;;
7;; Original code looks like:
8;;
9;; char *D() {
10;;   return new char[10];
11;; }
12;;
13;; char *F() {
14;;   return D();
15;; }
16;;
17;; char *C() {
18;;   return D();
19;; }
20;;
21;; char *B() {
22;;   return C();
23;; }
24;;
25;; char *E() {
26;;   return C();
27;; }
28;; int main(int argc, char **argv) {
29;;   char *x = B(); // cold
30;;   char *y = E(); // cold
31;;   char *z = F(); // default
32;;   memset(x, 0, 10);
33;;   memset(y, 0, 10);
34;;   memset(z, 0, 10);
35;;   delete[] z;
36;;   sleep(10);
37;;   delete[] x;
38;;   delete[] y;
39;;   return 0;
40;; }
41;;
42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43;; memory freed after sleep(10) results in cold lifetimes.
44;;
45;; The code below was created by forcing inlining of C into both B and E.
46;; Since both allocation contexts via C are cold, the matched memprof
47;; metadata has the context pruned above C's callsite. This requires
48;; matching the stack node for C to callsites where it was inlined (i.e.
49;; the callsites in B and E that have callsite metadata that includes C's).
50;; It also requires duplication of that node in the graph as well as the
51;; duplication of the context ids along that path through the graph,
52;; so that we can represent the duplicated (via inlining) C callsite.
53;;
54;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
55
56;; -stats requires asserts
57; REQUIRES: asserts
58
59; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
60; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
61; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
62; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
63; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
64; RUN:  --check-prefix=STATS --check-prefix=REMARKS
65
66; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
67; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
68;; We should clone D once for the cold allocations via C.
69; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
70
71target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
72target triple = "x86_64-unknown-linux-gnu"
73
74define internal ptr @_Z1Dv() {
75entry:
76  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
77  ret ptr null
78}
79
80declare ptr @_Znam(i64)
81
82define internal ptr @_Z1Fv() #0 {
83entry:
84  %call = call noundef ptr @_Z1Dv(), !callsite !6
85  ret ptr null
86}
87
88; Function Attrs: mustprogress noinline optnone uwtable
89define internal ptr @_Z1Cv() #1 {
90entry:
91  %call = call noundef ptr @_Z1Dv(), !callsite !7
92  ret ptr null
93}
94
95; Function Attrs: mustprogress noinline optnone uwtable
96define internal ptr @_Z1Bv() #1 {
97entry:
98  %call.i = call noundef ptr @_Z1Dv(), !callsite !8
99  ret ptr null
100}
101
102; Function Attrs: mustprogress noinline optnone uwtable
103define internal ptr @_Z1Ev() #1 {
104entry:
105  %call.i = call noundef ptr @_Z1Dv(), !callsite !9
106  ret ptr null
107}
108
109; Function Attrs: noinline
110declare i32 @main() #2
111
112; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
113declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
114
115; Function Attrs: nounwind
116declare void @_ZdaPv() #4
117
118declare i32 @sleep() #5
119
120attributes #0 = { "disable-tail-calls"="true" }
121attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
122attributes #2 = { noinline }
123attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
124attributes #4 = { nounwind }
125attributes #5 = { "no-trapping-math"="true" }
126attributes #6 = { builtin }
127
128!0 = !{!1, !3}
129!1 = !{!2, !"cold"}
130!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
131!3 = !{!4, !"notcold"}
132!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
133!5 = !{i64 6541423618768552252}
134!6 = !{i64 -4903163940066524832}
135!7 = !{i64 -6270142974039008131}
136!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
137!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
138
139
140;; After adding only the alloc node memprof metadata, we only have 2 contexts.
141
142; DUMP: CCG before updating call stack chains:
143; DUMP: Callsite Context Graph:
144; DUMP: Node [[D:0x[a-z0-9]+]]
145; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
146; DUMP: 	AllocTypes: NotColdCold
147; DUMP: 	ContextIds: 1 2
148; DUMP: 	CalleeEdges:
149; DUMP: 	CallerEdges:
150; DUMP: 		Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
151; DUMP: 		Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
152
153; DUMP: Node [[C]]
154; DUMP: 	null Call
155; DUMP: 	AllocTypes: Cold
156; DUMP: 	ContextIds: 1
157; DUMP: 	CalleeEdges:
158; DUMP: 		Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
159; DUMP: 	CallerEdges:
160
161; DUMP: Node [[F]]
162; DUMP: 	null Call
163; DUMP: 	AllocTypes: NotCold
164; DUMP: 	ContextIds: 2
165; DUMP: 	CalleeEdges:
166; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
167; DUMP: 	CallerEdges:
168
169;; After updating for callsite metadata, we should have generated context ids 3 and 4,
170;; along with 2 new nodes for those callsites. All have the same allocation type
171;; behavior as the original C node.
172
173; DUMP: CCG before cloning:
174; DUMP: Callsite Context Graph:
175; DUMP: Node [[D]]
176; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
177; DUMP: 	AllocTypes: NotColdCold
178; DUMP: 	ContextIds: 1 2 3 4
179; DUMP: 	CalleeEdges:
180; DUMP: 	CallerEdges:
181; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
182; DUMP: 		Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
183; DUMP: 		Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
184; DUMP: 		Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
185
186; DUMP: Node [[F]]
187; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
188; DUMP: 	AllocTypes: NotCold
189; DUMP: 	ContextIds: 2
190; DUMP: 	CalleeEdges:
191; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
192; DUMP: 	CallerEdges:
193
194; DUMP: Node [[C2]]
195; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
196; DUMP: 	AllocTypes: Cold
197; DUMP: 	ContextIds: 3
198; DUMP: 	CalleeEdges:
199; DUMP: 		Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
200; DUMP: 	CallerEdges:
201
202; DUMP: Node [[B]]
203; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
204; DUMP: 	AllocTypes: Cold
205; DUMP: 	ContextIds: 4
206; DUMP: 	CalleeEdges:
207; DUMP: 		Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
208; DUMP: 	CallerEdges:
209
210; DUMP: Node [[E]]
211; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
212; DUMP: 	AllocTypes: Cold
213; DUMP: 	ContextIds: 1
214; DUMP: 	CalleeEdges:
215; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
216; DUMP: 	CallerEdges:
217
218; DUMP: CCG after cloning:
219; DUMP: Callsite Context Graph:
220; DUMP: Node [[D]]
221; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
222; DUMP: 	AllocTypes: NotCold
223; DUMP: 	ContextIds: 2
224; DUMP: 	CalleeEdges:
225; DUMP: 	CallerEdges:
226; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
227; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
228
229; DUMP: Node [[F]]
230; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
231; DUMP: 	AllocTypes: NotCold
232; DUMP: 	ContextIds: 2
233; DUMP: 	CalleeEdges:
234; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
235; DUMP: 	CallerEdges:
236
237; DUMP: Node [[C2]]
238; DUMP: 	  %call = call noundef ptr @_Z1Dv()	(clone 0)
239; DUMP: 	AllocTypes: Cold
240; DUMP: 	ContextIds: 3
241; DUMP: 	CalleeEdges:
242; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
243; DUMP: 	CallerEdges:
244
245; DUMP: Node [[B]]
246; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
247; DUMP: 	AllocTypes: Cold
248; DUMP: 	ContextIds: 4
249; DUMP: 	CalleeEdges:
250; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
251; DUMP: 	CallerEdges:
252
253; DUMP: Node [[E]]
254; DUMP: 	  %call.i = call noundef ptr @_Z1Dv()	(clone 0)
255; DUMP: 	AllocTypes: Cold
256; DUMP: 	ContextIds: 1
257; DUMP: 	CalleeEdges:
258; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
259; DUMP: 	CallerEdges:
260
261; DUMP: Node [[D2]]
262; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6	(clone 0)
263; DUMP: 	AllocTypes: Cold
264; DUMP: 	ContextIds: 1 3 4
265; DUMP: 	CalleeEdges:
266; DUMP: 	CallerEdges:
267; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
268; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
269; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
270; DUMP:         Clone of [[D]]
271
272; REMARKS: created clone _Z1Dv.memprof.1
273; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
274; REMARKS: call in clone _Z1Cv assigned to call function clone _Z1Dv.memprof.1
275; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
276; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
277; REMARKS: call in clone _Z1Fv assigned to call function clone _Z1Dv
278; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
279
280
281;; The allocation via F does not allocate cold memory. It should call the
282;; original D, which ultimately call the original allocation decorated
283;; with a "notcold" attribute.
284; IR: define internal {{.*}} @_Z1Dv()
285; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
286; IR: define internal {{.*}} @_Z1Fv()
287; IR:   call {{.*}} @_Z1Dv()
288;; The allocations via B and E allocate cold memory. They should call the
289;; cloned D, which ultimately call the cloned allocation decorated with a
290;; "cold" attribute.
291; IR: define internal {{.*}} @_Z1Bv()
292; IR:   call {{.*}} @_Z1Dv.memprof.1()
293; IR: define internal {{.*}} @_Z1Ev()
294; IR:   call {{.*}} @_Z1Dv.memprof.1()
295; IR: define internal {{.*}} @_Z1Dv.memprof.1()
296; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
297; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
298; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
299
300
301; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
302; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
303; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
304
305
306; DOTPRE: digraph "prestackupdate" {
307; DOTPRE: 	label="prestackupdate";
308; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
309; DOTPRE: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
310; DOTPRE: 	Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
311; DOTPRE: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
312; DOTPRE: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
313; DOTPRE: }
314
315
316; DOTPOST:digraph "postbuild" {
317; DOTPOST:	label="postbuild";
318; DOTPOST:	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
319; DOTPOST:	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
320; DOTPOST:	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
321; DOTPOST:	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
322; DOTPOST:	Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
323; DOTPOST:	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
324; DOTPOST:	Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
325; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
326; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
327; DOTPOST:}
328
329
330; DOTCLONED: digraph "cloned" {
331; DOTCLONED: 	label="cloned";
332; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
333; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
334; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
335; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
336; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
337; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
338; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
339; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
340; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
341; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
342; DOTCLONED: }
343