xref: /llvm-project/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll (revision e3e6bc699574550f2ed1de07f4e5bcdddaa65557)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. Also tests graph and IR
5;; cloning.
6;;
7;; Original code looks like:
8;;
9;; char *D() {
10;;   return new char[10];
11;; }
12;;
13;; char *F() {
14;;   return D();
15;; }
16;;
17;; char *C() {
18;;   return D();
19;; }
20;;
21;; char *B() {
22;;   return C();
23;; }
24;;
25;; char *E() {
26;;   return C();
27;; }
28;; int main(int argc, char **argv) {
29;;   char *x = B(); // cold
30;;   char *y = E(); // cold
31;;   char *z = F(); // default
32;;   memset(x, 0, 10);
33;;   memset(y, 0, 10);
34;;   memset(z, 0, 10);
35;;   delete[] z;
36;;   sleep(10);
37;;   delete[] x;
38;;   delete[] y;
39;;   return 0;
40;; }
41;;
42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43;; memory freed after sleep(10) results in cold lifetimes.
44;;
45;; The code below was created by forcing inlining of C into both B and E.
46;; Since both allocation contexts via C are cold, the matched memprof
47;; metadata has the context pruned above C's callsite. This requires
48;; matching the stack node for C to callsites where it was inlined (i.e.
49;; the callsites in B and E that have callsite metadata that includes C's).
50;; It also requires duplication of that node in the graph as well as the
51;; duplication of the context ids along that path through the graph,
52;; so that we can represent the duplicated (via inlining) C callsite.
53;;
54;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
55
56;; -stats requires asserts
57; REQUIRES: asserts
58
59; RUN: opt -thinlto-bc %s >%t.o
60; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
61; RUN:  -r=%t.o,main,plx \
62; RUN:  -r=%t.o,_ZdaPv, \
63; RUN:  -r=%t.o,sleep, \
64; RUN:  -r=%t.o,_Znam, \
65; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
66; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
67; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
68; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
69; RUN:  --check-prefix=STATS
70
71; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
72; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
73;; We should clone D once for the cold allocations via C.
74; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
75
76
77;; Try again but with distributed ThinLTO
78; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
79; RUN:  -thinlto-distributed-indexes \
80; RUN:  -r=%t.o,main,plx \
81; RUN:  -r=%t.o,_ZdaPv, \
82; RUN:  -r=%t.o,sleep, \
83; RUN:  -r=%t.o,_Znam, \
84; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
85; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t2. \
86; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
87; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
88; RUN:  --check-prefix=STATS
89
90; RUN:  cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
91; RUN:  cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
92;; We should clone D once for the cold allocations via C.
93; RUN:  cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
94
95;; Check distributed index
96; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
97
98source_filename = "duplicate-context-ids.ll"
99target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
100target triple = "x86_64-unknown-linux-gnu"
101
102define internal ptr @_Z1Dv() {
103entry:
104  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
105  ret ptr null
106}
107
108declare ptr @_Znam(i64)
109
110define internal ptr @_Z1Fv() {
111entry:
112  %call = call ptr @_Z1Dv(), !callsite !6
113  ret ptr null
114}
115
116define internal ptr @_Z1Cv() {
117entry:
118  %call = call ptr @_Z1Dv(), !callsite !7
119  ret ptr null
120}
121
122define internal ptr @_Z1Bv() {
123entry:
124  %call.i = call ptr @_Z1Dv(), !callsite !8
125  ret ptr null
126}
127
128define internal ptr @_Z1Ev() {
129entry:
130  %call.i = call ptr @_Z1Dv(), !callsite !9
131  ret ptr null
132}
133
134define i32 @main() {
135entry:
136  call ptr @_Z1Bv()
137  call ptr @_Z1Ev()
138  call ptr @_Z1Fv()
139  ret i32 0
140}
141
142declare void @_ZdaPv()
143
144declare i32 @sleep()
145
146!0 = !{!1, !3}
147!1 = !{!2, !"cold"}
148!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
149!3 = !{!4, !"notcold"}
150!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
151!5 = !{i64 6541423618768552252}
152!6 = !{i64 -4903163940066524832}
153!7 = !{i64 -6270142974039008131}
154!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
155!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
156
157
158;; After adding only the alloc node memprof metadata, we only have 2 contexts.
159
160; DUMP: CCG before updating call stack chains:
161; DUMP: Callsite Context Graph:
162; DUMP: Node [[D:0x[a-z0-9]+]]
163; DUMP: 	Versions: 1 MIB:
164; DUMP: 		AllocType 2 StackIds: 0
165; DUMP: 		AllocType 1 StackIds: 1
166; DUMP: 	(clone 0)
167; DUMP: 	AllocTypes: NotColdCold
168; DUMP: 	ContextIds: 1 2
169; DUMP: 	CalleeEdges:
170; DUMP: 	CallerEdges:
171; DUMP: 		Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
172; DUMP: 		Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
173
174; DUMP: Node [[C]]
175; DUMP: 	null Call
176; DUMP: 	AllocTypes: Cold
177; DUMP: 	ContextIds: 1
178; DUMP: 	CalleeEdges:
179; DUMP: 		Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
180; DUMP: 	CallerEdges:
181
182; DUMP: Node [[F]]
183; DUMP: 	null Call
184; DUMP: 	AllocTypes: NotCold
185; DUMP: 	ContextIds: 2
186; DUMP: 	CalleeEdges:
187; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
188; DUMP: 	CallerEdges:
189
190;; After updating for callsite metadata, we should have generated context ids 3 and 4,
191;; along with 2 new nodes for those callsites. All have the same allocation type
192;; behavior as the original C node.
193
194; DUMP: CCG before cloning:
195; DUMP: Callsite Context Graph:
196; DUMP: Node [[D]]
197; DUMP: 	Versions: 1 MIB:
198; DUMP: 		AllocType 2 StackIds: 0
199; DUMP: 		AllocType 1 StackIds: 1
200; DUMP: 	(clone 0)
201; DUMP: 	AllocTypes: NotColdCold
202; DUMP: 	ContextIds: 1 2 3 4
203; DUMP: 	CalleeEdges:
204; DUMP: 	CallerEdges:
205; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
206; DUMP: 		Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
207; DUMP: 		Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
208; DUMP: 		Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
209
210; DUMP: Node [[F]]
211; DUMP: 	Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1	(clone 0)
212; DUMP: 	AllocTypes: NotCold
213; DUMP: 	ContextIds: 2
214; DUMP: 	CalleeEdges:
215; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
216; DUMP: 	CallerEdges:
217
218; DUMP: Node [[C2]]
219; DUMP: 	Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0	(clone 0)
220; DUMP: 	AllocTypes: Cold
221; DUMP: 	ContextIds: 3
222; DUMP: 	CalleeEdges:
223; DUMP: 		Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
224; DUMP: 	CallerEdges:
225
226; DUMP: Node [[B]]
227; DUMP: 	Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2	(clone 0)
228; DUMP: 	AllocTypes: Cold
229; DUMP: 	ContextIds: 4
230; DUMP: 	CalleeEdges:
231; DUMP: 		Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
232; DUMP: 	CallerEdges:
233
234; DUMP: Node [[E]]
235; DUMP: 	Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3	(clone 0)
236; DUMP: 	AllocTypes: Cold
237; DUMP: 	ContextIds: 1
238; DUMP: 	CalleeEdges:
239; DUMP: 		Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
240; DUMP: 	CallerEdges:
241
242
243; DUMP: CCG after cloning:
244; DUMP: Callsite Context Graph:
245; DUMP: Node [[D]]
246; DUMP:         Versions: 1 MIB:
247; DUMP:                 AllocType 2 StackIds: 0
248; DUMP:                 AllocType 1 StackIds: 1
249; DUMP:         (clone 0)
250; DUMP: 	AllocTypes: NotCold
251; DUMP: 	ContextIds: 2
252; DUMP: 	CalleeEdges:
253; DUMP: 	CallerEdges:
254; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
255; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
256
257; DUMP: Node [[F]]
258; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1       (clone 0)
259; DUMP: 	AllocTypes: NotCold
260; DUMP: 	ContextIds: 2
261; DUMP: 	CalleeEdges:
262; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
263; DUMP: 	CallerEdges:
264
265; DUMP: Node [[C2]]
266; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0       (clone 0)
267; DUMP: 	AllocTypes: Cold
268; DUMP: 	ContextIds: 3
269; DUMP: 	CalleeEdges:
270; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
271; DUMP: 	CallerEdges:
272
273; DUMP: Node [[B]]
274; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2    (clone 0)
275; DUMP: 	AllocTypes: Cold
276; DUMP: 	ContextIds: 4
277; DUMP: 	CalleeEdges:
278; DUMP: 		Edge from Callee [[D2]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
279; DUMP: 	CallerEdges:
280
281; DUMP: Node [[E]]
282; DUMP:         Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3    (clone 0)
283; DUMP: 	AllocTypes: Cold
284; DUMP: 	ContextIds: 1
285; DUMP: 	CalleeEdges:
286; DUMP: 		Edge from Callee [[D2]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
287; DUMP: 	CallerEdges:
288
289; DUMP: Node [[D2]]
290; DUMP:         Versions: 1 MIB:
291; DUMP:                 AllocType 2 StackIds: 0
292; DUMP:                 AllocType 1 StackIds: 1
293; DUMP:         (clone 0)
294; DUMP: 	AllocTypes: Cold
295; DUMP: 	ContextIds: 1 3 4
296; DUMP: 	CalleeEdges:
297; DUMP: 	CallerEdges:
298; DUMP: 		Edge from Callee [[D2]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
299; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
300; DUMP: 		Edge from Callee [[D2]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
301; DUMP:         Clone of [[D]]
302
303
304; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
305; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
306; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
307
308
309; DOTPRE: digraph "prestackupdate" {
310; DOTPRE: 	label="prestackupdate";
311; DOTPRE: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
312; DOTPRE: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
313; DOTPRE: 	Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
314; DOTPRE: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
315; DOTPRE: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
316; DOTPRE: }
317
318
319; DOTPOST:digraph "postbuild" {
320; DOTPOST:	label="postbuild";
321; DOTPOST:	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
322; DOTPOST:	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
323; DOTPOST:	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
324; DOTPOST:	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
325; DOTPOST:	Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
326; DOTPOST:	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
327; DOTPOST:	Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
328; DOTPOST:	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
329; DOTPOST:	Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
330; DOTPOST:}
331
332
333; DOTCLONED: digraph "cloned" {
334; DOTCLONED: 	label="cloned";
335; DOTCLONED: 	Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
336; DOTCLONED: 	Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
337; DOTCLONED: 	Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
338; DOTCLONED: 	Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
339; DOTCLONED: 	Node[[C]] -> Node[[D2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="cyan"];
340; DOTCLONED: 	Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
341; DOTCLONED: 	Node[[B]] -> Node[[D2]][tooltip="ContextIds: 4",fillcolor="cyan"];
342; DOTCLONED: 	Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
343; DOTCLONED: 	Node[[E]] -> Node[[D2]][tooltip="ContextIds: 1",fillcolor="cyan"];
344; DOTCLONED: 	Node[[D2]] [shape=record,tooltip="N[[D2]] ContextIds: 1 3 4",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"];
345; DOTCLONED: }
346
347; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 1643923691937891493, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
348; DISTRIB: ^[[D]] = gv: (guid: 4881081444663423788, {{.*}} allocs: ((versions: (notcold, cold)
349; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 14590037969532473829, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
350; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 17035303613541779335, {{.*}} callsites: ((callee: ^[[D]], clones: (0)
351; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 17820708772846654376, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
352