xref: /llvm-project/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll (revision 176889868024d98db032842bc47b416997d9e349)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. This test requires more
5;; complex duplication due to multiple contexts for different allocations
6;; that share some of the same callsite nodes.
7;;
8;; Original code looks like:
9;;
10;; char *D(bool Call1) {
11;;   if (Call1)
12;;     return new char[10];
13;;   else
14;;     return new char[10];
15;; }
16;;
17;; char *C(bool Call1) {
18;;   return D(Call1);
19;; }
20;;
21;; char *B(bool Call1) {
22;;   if (Call1)
23;;     return C(true);
24;;   else
25;;     return C(false);
26;; }
27;;
28;; char *A(bool Call1) {
29;;   return B(Call1);
30;; }
31;;
32;; char *A1() {
33;;   return A(true);
34;; }
35;;
36;; char *A2() {
37;;   return A(true);
38;; }
39;;
40;; char *A3() {
41;;   return A(false);
42;; }
43;;
44;; char *A4() {
45;;   return A(false);
46;; }
47;;
48;; char *E() {
49;;   return B(true);
50;; }
51;;
52;; char *F() {
53;;   return B(false);
54;; }
55;;
56;; int main(int argc, char **argv) {
57;;   char *a1 = A1(); // cold
58;;   char *a2 = A2(); // cold
59;;   char *e = E(); // default
60;;   char *a3 = A3(); // default
61;;   char *a4 = A4(); // default
62;;   char *f = F(); // cold
63;;   memset(a1, 0, 10);
64;;   memset(a2, 0, 10);
65;;   memset(e, 0, 10);
66;;   memset(a3, 0, 10);
67;;   memset(a4, 0, 10);
68;;   memset(f, 0, 10);
69;;   delete[] a3;
70;;   delete[] a4;
71;;   delete[] e;
72;;   sleep(10);
73;;   delete[] a1;
74;;   delete[] a2;
75;;   delete[] f;
76;;   return 0;
77;; }
78;;
79;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
80;; memory freed after sleep(10) results in cold lifetimes.
81;;
82;; The code below was created by forcing inlining of A into its callers,
83;; without any other inlining or optimizations. Since both allocation contexts
84;; via A for each allocation in D have the same allocation type (cold via
85;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
86;; new in D, the contexts for those respective allocations are pruned above A.
87;; The allocations via E and F are to ensure we don't prune above B.
88;;
89;; The matching onto the inlined A[1234]->A sequences will require duplication
90;; of the context id assigned to the context from A for each allocation in D.
91;; This test ensures that we do this correctly in the presence of callsites
92;; shared by the different duplicated context ids (i.e. callsite in C).
93;;
94;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
95
96; RUN: opt -thinlto-bc %s >%t.o
97; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
98; RUN:	-supports-hot-cold-new \
99; RUN:  -r=%t.o,main,plx \
100; RUN:  -r=%t.o,_Z1Db,plx \
101; RUN:  -r=%t.o,_Z1Cb,plx \
102; RUN:  -r=%t.o,_Z1Bb,plx \
103; RUN:  -r=%t.o,_Z1Ab,plx \
104; RUN:  -r=%t.o,_Z2A1v,plx \
105; RUN:  -r=%t.o,_Z2A2v,plx \
106; RUN:  -r=%t.o,_Z2A3v,plx \
107; RUN:  -r=%t.o,_Z2A4v,plx \
108; RUN:  -r=%t.o,_Z1Ev,plx \
109; RUN:  -r=%t.o,_Z1Fv,plx \
110; RUN:  -r=%t.o,_ZdaPv, \
111; RUN:  -r=%t.o,sleep, \
112; RUN:  -r=%t.o,_Znam, \
113; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
114; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
115; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP
116
117
118target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
119target triple = "x86_64-unknown-linux-gnu"
120
121define ptr @_Z1Db(i1 %Call1) {
122entry:
123  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
124  br label %return
125
126if.else:                                          ; No predecessors!
127  %call1 = call ptr @_Znam(i64 0), !memprof !6, !callsite !11
128  br label %return
129
130return:                                           ; preds = %if.else, %entry
131  ret ptr null
132}
133
134declare ptr @_Znam(i64)
135
136define ptr @_Z1Cb(i1 %Call1) {
137entry:
138  %call = call ptr @_Z1Db(i1 false), !callsite !12
139  ret ptr null
140}
141
142define ptr @_Z1Bb(i1 %Call1) {
143entry:
144  %call = call ptr @_Z1Cb(i1 false), !callsite !13
145  br label %return
146
147if.else:                                          ; No predecessors!
148  %call1 = call ptr @_Z1Cb(i1 false), !callsite !14
149  br label %return
150
151return:                                           ; preds = %if.else, %entry
152  ret ptr null
153}
154
155define ptr @_Z1Ab() {
156entry:
157  %call = call ptr @_Z1Bb(i1 false), !callsite !15
158  ret ptr null
159}
160
161define ptr @_Z2A1v() {
162entry:
163  %call.i = call ptr @_Z1Bb(i1 false), !callsite !16
164  ret ptr null
165}
166
167define ptr @_Z2A2v() {
168entry:
169  %call.i = call ptr @_Z1Bb(i1 false), !callsite !17
170  ret ptr null
171}
172
173define ptr @_Z2A3v() {
174entry:
175  %call.i = call ptr @_Z1Bb(i1 false), !callsite !18
176  ret ptr null
177}
178
179define ptr @_Z2A4v() {
180entry:
181  %call.i = call ptr @_Z1Bb(i1 false), !callsite !19
182  ret ptr null
183}
184
185define ptr @_Z1Ev() {
186entry:
187  %call = call ptr @_Z1Bb(i1 false), !callsite !20
188  ret ptr null
189}
190
191define ptr @_Z1Fv() {
192entry:
193  %call = call ptr @_Z1Bb(i1 false), !callsite !21
194  ret ptr null
195}
196
197declare i32 @main()
198
199declare void @_ZdaPv()
200
201declare i32 @sleep()
202
203; uselistorder directives
204uselistorder ptr @_Znam, { 1, 0 }
205
206!0 = !{!1, !3}
207!1 = !{!2, !"notcold"}
208!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
209!3 = !{!4, !"cold"}
210!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
211!5 = !{i64 4854880825882961848}
212!6 = !{!7, !9}
213!7 = !{!8, !"notcold"}
214!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
215!9 = !{!10, !"cold"}
216!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
217!11 = !{i64 -8775068539491628272}
218!12 = !{i64 -904694911315397047}
219!13 = !{i64 6532298921261778285}
220!14 = !{i64 7859682663773658275}
221!15 = !{i64 -6528110295079665978}
222!16 = !{i64 -6528110295079665978, i64 5747919905719679568}
223!17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
224!18 = !{i64 -6528110295079665978, i64 1794685869326395337}
225!19 = !{i64 -6528110295079665978, i64 5462047985461644151}
226!20 = !{i64 1905834578520680781}
227!21 = !{i64 -4903163940066524832}
228
229
230;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
231;; match the interesting parts of the pre-update graph here).
232
233; DUMP: CCG before updating call stack chains:
234; DUMP: Callsite Context Graph:
235
236; DUMP: Node [[D1:0x[a-z0-9]+]]
237; DUMP: Versions: 1 MIB:
238; DUMP:                 AllocType 1 StackIds: 0, 1, 2
239; DUMP:                 AllocType 2 StackIds: 0, 1, 3
240; DUMP:         (clone 0)
241; DUMP: 	AllocTypes: NotColdCold
242; DUMP: 	ContextIds: 1 2
243
244; DUMP: Node [[C:0x[a-z0-9]+]]
245; DUMP:         null Call
246; DUMP:         AllocTypes: NotColdCold
247; DUMP:         ContextIds: 1 2 3 4
248; DUMP:         CalleeEdges:
249; DUMP:                 Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
250; DUMP:                 Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
251
252; DUMP: Node [[D2]]
253; DUMP: Versions: 1 MIB:
254; DUMP:                 AllocType 1 StackIds: 0, 4, 3
255; DUMP:                 AllocType 2 StackIds: 0, 4, 5
256; DUMP:         (clone 0)
257; DUMP: 	AllocTypes: NotColdCold
258; DUMP: 	ContextIds: 3 4
259
260
261;; After updating for callsite metadata, we should have duplicated the context
262;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A,
263;; and used those on new nodes for those callers. Note that while in reality
264;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
265;; due to the pruning we have lost this information and thus end up duplicating
266;; both of A's contexts to all of the new nodes (which could result in some
267;; unnecessary cloning.
268
269; DUMP: CCG before cloning:
270; DUMP: Callsite Context Graph:
271; DUMP: Node [[D1]]
272; DUMP: Versions: 1 MIB:
273; DUMP:                 AllocType 1 StackIds: 0, 1, 2
274; DUMP:                 AllocType 2 StackIds: 0, 1, 3
275; DUMP:         (clone 0)
276; DUMP: 	AllocTypes: NotColdCold
277; DUMP: 	ContextIds: 1 2 5 7 9 11
278; DUMP: 	CalleeEdges:
279; DUMP: 	CallerEdges:
280; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
281
282; DUMP: Node [[C]]
283; DUMP: 	Callee: 11485875876353461977 (_Z1Db) Clones: 0 StackIds: 0      (clone 0)
284; DUMP: 	AllocTypes: NotColdCold
285; DUMP: 	ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
286; DUMP: 	CalleeEdges:
287; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
288; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
289; DUMP: 	CallerEdges:
290; DUMP: 		Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
291; DUMP: 		Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
292
293; DUMP: Node [[B1]]
294; DUMP: 	Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 1      (clone 0)
295; DUMP: 	AllocTypes: NotColdCold
296; DUMP: 	ContextIds: 1 2 5 7 9 11
297; DUMP: 	CalleeEdges:
298; DUMP: 		Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
299; DUMP: 	CallerEdges:
300; DUMP: 		Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
301; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
302; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
303; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
304; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
305; DUMP: 		Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
306
307; DUMP: Node [[E]]
308; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 2       (clone 0)
309; DUMP: 	AllocTypes: NotCold
310; DUMP: 	ContextIds: 1
311; DUMP: 	CalleeEdges:
312; DUMP: 		Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
313; DUMP: 	CallerEdges:
314
315; DUMP: Node [[D2]]
316; DUMP: Versions: 1 MIB:
317; DUMP:                 AllocType 1 StackIds: 0, 4, 3
318; DUMP:                 AllocType 2 StackIds: 0, 4, 5
319; DUMP:         (clone 0)
320; DUMP: 	AllocTypes: NotColdCold
321; DUMP: 	ContextIds: 3 4 6 8 10 12
322; DUMP: 	CalleeEdges:
323; DUMP: 	CallerEdges:
324; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
325
326; DUMP: Node [[B2]]
327; DUMP: 	Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 4      (clone 0)
328; DUMP: 	AllocTypes: NotColdCold
329; DUMP: 	ContextIds: 3 4 6 8 10 12
330; DUMP: 	CalleeEdges:
331; DUMP: 		Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
332; DUMP: 	CallerEdges:
333; DUMP: 		Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
334; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
335; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
336; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
337; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
338; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
339
340; DUMP: Node [[F]]
341; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 5       (clone 0)
342; DUMP: 	AllocTypes: Cold
343; DUMP: 	ContextIds: 4
344; DUMP: 	CalleeEdges:
345; DUMP: 		Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
346; DUMP: 	CallerEdges:
347
348; DUMP: Node [[A2]]
349; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 7	(clone 0)
350; DUMP: 	AllocTypes: NotColdCold
351; DUMP: 	ContextIds: 5 6
352; DUMP: 	CalleeEdges:
353; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
354; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
355; DUMP: 	CallerEdges:
356
357; DUMP: Node [[A3]]
358; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 8    (clone 0)
359; DUMP: 	AllocTypes: NotColdCold
360; DUMP: 	ContextIds: 7 8
361; DUMP: 	CalleeEdges:
362; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
363; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
364; DUMP: 	CallerEdges:
365
366; DUMP: Node [[A1]]
367; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3	(clone 0)
368; DUMP: 	AllocTypes: NotColdCold
369; DUMP: 	ContextIds: 9 10
370; DUMP: 	CalleeEdges:
371; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
372; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
373; DUMP: 	CallerEdges:
374
375; DUMP: Node [[A4]]
376; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 9    (clone 0)
377; DUMP: 	AllocTypes: NotColdCold
378; DUMP: 	ContextIds: 11 12
379; DUMP: 	CalleeEdges:
380; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
381; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
382; DUMP: 	CallerEdges:
383
384; DUMP: Node [[A]]
385; DUMP: 	Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 6    (clone 0)
386; DUMP: 	AllocTypes: NotColdCold
387; DUMP: 	ContextIds: 2 3
388; DUMP: 	CalleeEdges:
389; DUMP: 		Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
390; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
391; DUMP: 	CallerEdges:
392