xref: /llvm-project/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll (revision 176889868024d98db032842bc47b416997d9e349)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. This test requires more
5;; complex duplication due to multiple contexts for different allocations
6;; that share some of the same callsite nodes.
7;;
8;; Original code looks like:
9;;
10;; char *D(bool Call1) {
11;;   if (Call1)
12;;     return new char[10];
13;;   else
14;;     return new char[10];
15;; }
16;;
17;; char *C(bool Call1) {
18;;   return D(Call1);
19;; }
20;;
21;; char *B(bool Call1) {
22;;   if (Call1)
23;;     return C(true);
24;;   else
25;;     return C(false);
26;; }
27;;
28;; char *A(bool Call1) {
29;;   return B(Call1);
30;; }
31;;
32;; char *A1() {
33;;   return A(true);
34;; }
35;;
36;; char *A2() {
37;;   return A(true);
38;; }
39;;
40;; char *A3() {
41;;   return A(false);
42;; }
43;;
44;; char *A4() {
45;;   return A(false);
46;; }
47;;
48;; char *E() {
49;;   return B(true);
50;; }
51;;
52;; char *F() {
53;;   return B(false);
54;; }
55;;
56;; int main(int argc, char **argv) {
57;;   char *a1 = A1(); // cold
58;;   char *a2 = A2(); // cold
59;;   char *e = E(); // default
60;;   char *a3 = A3(); // default
61;;   char *a4 = A4(); // default
62;;   char *f = F(); // cold
63;;   memset(a1, 0, 10);
64;;   memset(a2, 0, 10);
65;;   memset(e, 0, 10);
66;;   memset(a3, 0, 10);
67;;   memset(a4, 0, 10);
68;;   memset(f, 0, 10);
69;;   delete[] a3;
70;;   delete[] a4;
71;;   delete[] e;
72;;   sleep(10);
73;;   delete[] a1;
74;;   delete[] a2;
75;;   delete[] f;
76;;   return 0;
77;; }
78;;
79;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
80;; memory freed after sleep(10) results in cold lifetimes.
81;;
82;; The code below was created by forcing inlining of A into its callers,
83;; without any other inlining or optimizations. Since both allocation contexts
84;; via A for each allocation in D have the same allocation type (cold via
85;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
86;; new in D, the contexts for those respective allocations are pruned above A.
87;; The allocations via E and F are to ensure we don't prune above B.
88;;
89;; The matching onto the inlined A[1234]->A sequences will require duplication
90;; of the context id assigned to the context from A for each allocation in D.
91;; This test ensures that we do this correctly in the presence of callsites
92;; shared by the different duplicated context ids (i.e. callsite in C).
93;;
94;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
95
96; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
97; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
98; RUN:  -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
99; RUN:  %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
100
101
102target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
103target triple = "x86_64-unknown-linux-gnu"
104
105; Function Attrs: mustprogress noinline uwtable
106define ptr @_Z1Db(i1 %Call1) #0 {
107entry:
108  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
109  br label %return
110
111if.else:                                          ; No predecessors!
112  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11
113  br label %return
114
115return:                                           ; preds = %if.else, %entry
116  ret ptr null
117}
118
119; Function Attrs: nobuiltin
120declare ptr @_Znam(i64) #1
121
122define ptr @_Z1Cb(i1 %Call1) {
123entry:
124  %tobool = trunc i8 0 to i1
125  %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12
126  ret ptr null
127}
128
129; Function Attrs: mustprogress noinline uwtable
130define ptr @_Z1Bb(i1 %Call1) #0 {
131entry:
132  %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13
133  br label %return
134
135if.else:                                          ; No predecessors!
136  %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14
137  br label %return
138
139return:                                           ; preds = %if.else, %entry
140  ret ptr null
141}
142
143define ptr @_Z1Ab(i1 %tobool) #2 {
144entry:
145  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15
146  ret ptr null
147}
148
149; Function Attrs: mustprogress noinline uwtable
150define ptr @_Z2A1v(i1 %tobool.i) #0 {
151entry:
152  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16
153  ret ptr null
154}
155
156; Function Attrs: mustprogress noinline uwtable
157define ptr @_Z2A2v(i1 %tobool.i) #0 {
158entry:
159  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17
160  ret ptr null
161}
162
163; Function Attrs: mustprogress noinline uwtable
164define ptr @_Z2A3v(i1 %tobool.i) #0 {
165entry:
166  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18
167  ret ptr null
168}
169
170; Function Attrs: mustprogress noinline uwtable
171define ptr @_Z2A4v(i1 %tobool.i) #0 {
172entry:
173  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19
174  ret ptr null
175}
176
177; Function Attrs: mustprogress noinline uwtable
178define ptr @_Z1Ev() #0 {
179entry:
180  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20
181  ret ptr null
182}
183
184; Function Attrs: mustprogress noinline uwtable
185define ptr @_Z1Fv() #0 {
186entry:
187  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21
188  ret ptr null
189}
190
191; Function Attrs: noinline
192declare i32 @main() #3
193
194; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
195declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
196
197declare void @_ZdaPv() #5
198
199declare i32 @sleep() #6
200
201; uselistorder directives
202uselistorder ptr @_Znam, { 1, 0 }
203
204attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
205attributes #1 = { nobuiltin }
206attributes #2 = { "tune-cpu"="generic" }
207attributes #3 = { noinline }
208attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
209attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
210attributes #6 = { "disable-tail-calls"="true" }
211attributes #7 = { builtin allocsize(0) }
212
213!0 = !{!1, !3}
214!1 = !{!2, !"notcold"}
215!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
216!3 = !{!4, !"cold"}
217!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
218!5 = !{i64 4854880825882961848}
219!6 = !{!7, !9}
220!7 = !{!8, !"notcold"}
221!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
222!9 = !{!10, !"cold"}
223!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
224!11 = !{i64 -8775068539491628272}
225!12 = !{i64 -904694911315397047}
226!13 = !{i64 6532298921261778285}
227!14 = !{i64 7859682663773658275}
228!15 = !{i64 -6528110295079665978}
229!16 = !{i64 -6528110295079665978, i64 5747919905719679568}
230!17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
231!18 = !{i64 -6528110295079665978, i64 1794685869326395337}
232!19 = !{i64 -6528110295079665978, i64 5462047985461644151}
233!20 = !{i64 1905834578520680781}
234!21 = !{i64 -4903163940066524832}
235
236
237;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
238;; match the interesting parts of the pre-update graph here).
239
240; DUMP: CCG before updating call stack chains:
241; DUMP: Callsite Context Graph:
242
243; DUMP: Node [[D1:0x[a-z0-9]+]]
244; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
245; DUMP: 	AllocTypes: NotColdCold
246; DUMP: 	ContextIds: 1 2
247
248; DUMP: Node [[C:0x[a-z0-9]+]]
249; DUMP:         null Call
250; DUMP:         AllocTypes: NotColdCold
251; DUMP:         ContextIds: 1 2 3 4
252; DUMP:         CalleeEdges:
253; DUMP:                 Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
254; DUMP:                 Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
255
256; DUMP: Node [[D2]]
257; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
258; DUMP: 	AllocTypes: NotColdCold
259; DUMP: 	ContextIds: 3 4
260
261
262;; After updating for callsite metadata, we should have duplicated the context
263;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A,
264;; and used those on new nodes for those callers. Note that while in reality
265;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
266;; due to the pruning we have lost this information and thus end up duplicating
267;; both of A's contexts to all of the new nodes (which could result in some
268;; unnecessary cloning.
269
270; DUMP: CCG before cloning:
271; DUMP: Callsite Context Graph:
272; DUMP: Node [[D1]]
273; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
274; DUMP: 	AllocTypes: NotColdCold
275; DUMP: 	ContextIds: 1 2 5 7 9 11
276; DUMP: 	CalleeEdges:
277; DUMP: 	CallerEdges:
278; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
279
280; DUMP: Node [[C]]
281; DUMP: 	  %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool)	(clone 0)
282; DUMP: 	AllocTypes: NotColdCold
283; DUMP: 	ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
284; DUMP: 	CalleeEdges:
285; DUMP: 		Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
286; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
287; DUMP: 	CallerEdges:
288; DUMP: 		Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
289; DUMP: 		Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
290
291; DUMP: Node [[B1]]
292; DUMP: 	  %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true)	(clone 0)
293; DUMP: 	AllocTypes: NotColdCold
294; DUMP: 	ContextIds: 1 2 5 7 9 11
295; DUMP: 	CalleeEdges:
296; DUMP: 		Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
297; DUMP: 	CallerEdges:
298; DUMP: 		Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
299; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
300; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
301; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
302; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
303; DUMP: 		Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
304
305; DUMP: Node [[E]]
306; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true)	(clone 0)
307; DUMP: 	AllocTypes: NotCold
308; DUMP: 	ContextIds: 1
309; DUMP: 	CalleeEdges:
310; DUMP: 		Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
311; DUMP: 	CallerEdges:
312
313; DUMP: Node [[D2]]
314; DUMP: 	  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
315; DUMP: 	AllocTypes: NotColdCold
316; DUMP: 	ContextIds: 3 4 6 8 10 12
317; DUMP: 	CalleeEdges:
318; DUMP: 	CallerEdges:
319; DUMP: 		Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
320
321; DUMP: Node [[B2]]
322; DUMP: 	  %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false)	(clone 0)
323; DUMP: 	AllocTypes: NotColdCold
324; DUMP: 	ContextIds: 3 4 6 8 10 12
325; DUMP: 	CalleeEdges:
326; DUMP: 		Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
327; DUMP: 	CallerEdges:
328; DUMP: 		Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
329; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
330; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
331; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
332; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
333; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
334
335; DUMP: Node [[F]]
336; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false)	(clone 0)
337; DUMP: 	AllocTypes: Cold
338; DUMP: 	ContextIds: 4
339; DUMP: 	CalleeEdges:
340; DUMP: 		Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
341; DUMP: 	CallerEdges:
342
343; DUMP: Node [[A2]]
344; DUMP: 	  %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool)	(clone 0)
345; DUMP: 	AllocTypes: NotColdCold
346; DUMP: 	ContextIds: 5 6
347; DUMP: 	CalleeEdges:
348; DUMP: 		Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
349; DUMP: 		Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
350; DUMP: 	CallerEdges:
351
352; DUMP: Node [[A3]]
353; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
354; DUMP: 	AllocTypes: NotColdCold
355; DUMP: 	ContextIds: 7 8
356; DUMP: 	CalleeEdges:
357; DUMP: 		Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
358; DUMP: 		Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
359; DUMP: 	CallerEdges:
360
361; DUMP: Node [[A1]]
362; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
363; DUMP: 	AllocTypes: NotColdCold
364; DUMP: 	ContextIds: 9 10
365; DUMP: 	CalleeEdges:
366; DUMP: 		Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
367; DUMP: 		Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
368; DUMP: 	CallerEdges:
369
370; DUMP: Node [[A4]]
371; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
372; DUMP: 	AllocTypes: NotColdCold
373; DUMP: 	ContextIds: 11 12
374; DUMP: 	CalleeEdges:
375; DUMP: 		Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
376; DUMP: 		Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
377; DUMP: 	CallerEdges:
378
379; DUMP: Node [[A]]
380; DUMP: 	  %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i)	(clone 0)
381; DUMP: 	AllocTypes: NotColdCold
382; DUMP: 	ContextIds: 2 3
383; DUMP: 	CalleeEdges:
384; DUMP: 		Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
385; DUMP: 		Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
386; DUMP: 	CallerEdges:
387