xref: /llvm-project/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll (revision 176889868024d98db032842bc47b416997d9e349)
1;; Test callsite context graph generation for call graph with two memprof
2;; contexts and partial inlining, requiring generation of a new fused node to
3;; represent the inlined sequence while matching callsite nodes onto the graph.
4;; Also tests graph and IR cloning.
5;;
6;; Original code looks like:
7;;
8;; char *bar() {
9;;   return new char[10];
10;; }
11;;
12;; char *baz() {
13;;   return bar();
14;; }
15;;
16;; char *foo() {
17;;   return baz();
18;; }
19;;
20;; int main(int argc, char **argv) {
21;;   char *x = foo();
22;;   char *y = foo();
23;;   memset(x, 0, 10);
24;;   memset(y, 0, 10);
25;;   delete[] x;
26;;   sleep(10);
27;;   delete[] y;
28;;   return 0;
29;; }
30;;
31;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
32;; memory freed after sleep(10) results in cold lifetimes.
33;;
34;; The code below was created by forcing inlining of baz into foo, and
35;; bar into baz. Due to the inlining of bar we will initially have two
36;; allocation nodes in the graph. This tests that we correctly match
37;; foo (with baz inlined) onto the graph nodes first, and generate a new
38;; fused node for it. We should then not match baz (with bar inlined) as that
39;; is not reached by the MIB contexts (since all calls from main will look
40;; like main -> foo(+baz) -> bar after the inlining reflected in this IR).
41;;
42;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
43
44;; -stats requires asserts
45; REQUIRES: asserts
46
47; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
48; RUN:	-memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
49; RUN:	-memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
50; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
51; RUN:	%s -S 2>&1 | FileCheck %s --check-prefix=DUMP --check-prefix=IR \
52; RUN:  --check-prefix=STATS --check-prefix=REMARKS
53
54; RUN:	cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
55;; We should create clones for foo and bar for the call from main to allocate
56;; cold memory.
57; RUN:	cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
58
59
60target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
61target triple = "x86_64-unknown-linux-gnu"
62
63define internal ptr @_Z3barv() {
64entry:
65  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
66  ret ptr null
67}
68
69; Function Attrs: nobuiltin
70declare ptr @_Znam(i64) #0
71
72; Function Attrs: mustprogress
73define internal ptr @_Z3bazv() #1 {
74entry:
75  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6
76  ret ptr null
77}
78
79; Function Attrs: noinline
80define internal ptr @_Z3foov() #2 {
81entry:
82  %call.i = call noundef ptr @_Z3barv(), !callsite !7
83  ret ptr null
84}
85
86define i32 @main() #3 {
87entry:
88  %call = call noundef ptr @_Z3foov(), !callsite !8
89  %call1 = call noundef ptr @_Z3foov(), !callsite !9
90  ret i32 0
91}
92
93; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
94declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
95
96; Function Attrs: nounwind
97declare void @_ZdaPv() #5
98
99declare i32 @sleep() #6
100
101attributes #0 = { nobuiltin }
102attributes #1 = { mustprogress }
103attributes #2 = { noinline }
104attributes #3 = { "tune-cpu"="generic" }
105attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
106attributes #5 = { nounwind }
107attributes #6 = { "disable-tail-calls"="true" }
108attributes #7 = { builtin }
109
110!0 = !{!1, !3}
111!1 = !{!2, !"notcold"}
112!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
113!3 = !{!4, !"cold"}
114!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
115!5 = !{i64 9086428284934609951}
116!6 = !{i64 9086428284934609951, i64 -5964873800580613432}
117!7 = !{i64 -5964873800580613432, i64 2732490490862098848}
118!8 = !{i64 8632435727821051414}
119!9 = !{i64 -3421689549917153178}
120
121
122; DUMP: CCG before cloning:
123; DUMP: Callsite Context Graph:
124; DUMP: Node [[BAR:0x[a-z0-9]+]]
125; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
126; DUMP: 	AllocTypes: NotColdCold
127; DUMP: 	ContextIds: 1 2
128; DUMP: 	CalleeEdges:
129; DUMP: 	CallerEdges:
130; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
131
132;; This is leftover from the MIB on the alloc inlined into baz. It is not
133;; matched with any call, since there is no such node in the IR. Due to the
134;; null call it will not participate in any context transformations.
135; DUMP: Node [[FOO2:0x[a-z0-9]+]]
136; DUMP: 	null Call
137; DUMP: 	AllocTypes: NotColdCold
138; DUMP: 	ContextIds: 3 4
139; DUMP: 	CalleeEdges:
140; DUMP: 		Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
141; DUMP: 	CallerEdges:
142; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
143; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
144
145; DUMP: Node [[MAIN1]]
146; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
147; DUMP: 	AllocTypes: NotCold
148; DUMP: 	ContextIds: 1 3
149; DUMP: 	CalleeEdges:
150; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
151; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
152; DUMP: 	CallerEdges:
153
154; DUMP: Node [[MAIN2]]
155; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
156; DUMP: 	AllocTypes: Cold
157; DUMP: 	ContextIds: 2 4
158; DUMP: 	CalleeEdges:
159; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
160; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
161; DUMP: 	CallerEdges:
162
163; DUMP: Node [[BAZ]]
164; DUMP: 	  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
165; DUMP: 	AllocTypes: NotColdCold
166; DUMP: 	ContextIds: 3 4
167; DUMP: 	CalleeEdges:
168; DUMP: 	CallerEdges:
169; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
170
171;; This is the node synthesized for the call to bar in foo that was created
172;; by inlining baz into foo.
173; DUMP: Node [[FOO]]
174; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
175; DUMP: 	AllocTypes: NotColdCold
176; DUMP: 	ContextIds: 1 2
177; DUMP: 	CalleeEdges:
178; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
179; DUMP: 	CallerEdges:
180; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
181; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
182
183; DUMP: CCG after cloning:
184; DUMP: Callsite Context Graph:
185; DUMP: Node [[BAR]]
186; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
187; DUMP: 	AllocTypes: NotCold
188; DUMP: 	ContextIds: 1
189; DUMP: 	CalleeEdges:
190; DUMP: 	CallerEdges:
191; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
192; DUMP:         Clones: [[BAR2:0x[a-z0-9]+]]
193
194; DUMP: Node [[FOO2]]
195; DUMP: 	null Call
196; DUMP: 	AllocTypes: NotColdCold
197; DUMP: 	ContextIds: 3 4
198; DUMP: 	CalleeEdges:
199; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
200; DUMP: 	CallerEdges:
201; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
202; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
203
204; DUMP: Node [[MAIN1]]
205; DUMP: 	  %call = call noundef ptr @_Z3foov()	(clone 0)
206; DUMP: 	AllocTypes: NotCold
207; DUMP: 	ContextIds: 1 3
208; DUMP: 	CalleeEdges:
209; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
210; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
211; DUMP: 	CallerEdges:
212
213; DUMP: Node [[MAIN2]]
214; DUMP: 	  %call1 = call noundef ptr @_Z3foov()	(clone 0)
215; DUMP: 	AllocTypes: Cold
216; DUMP: 	ContextIds: 2 4
217; DUMP: 	CalleeEdges:
218; DUMP: 		Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
219; DUMP: 		Edge from Callee [[FOO3:0x[a-z0-9]+]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
220; DUMP: 	CallerEdges:
221
222; DUMP: Node [[BAZ]]
223; DUMP: 	  %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
224; DUMP: 	AllocTypes: NotColdCold
225; DUMP: 	ContextIds: 3 4
226; DUMP: 	CalleeEdges:
227; DUMP: 	CallerEdges:
228; DUMP: 		Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
229
230; DUMP: Node [[FOO]]
231; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
232; DUMP: 	AllocTypes: NotCold
233; DUMP: 	ContextIds: 1
234; DUMP: 	CalleeEdges:
235; DUMP: 		Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotCold ContextIds: 1
236; DUMP: 	CallerEdges:
237; DUMP: 		Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
238; DUMP:         Clones: [[FOO3]]
239
240; DUMP: Node [[FOO3]]
241; DUMP: 	  %call.i = call noundef ptr @_Z3barv()	(clone 0)
242; DUMP: 	AllocTypes: Cold
243; DUMP: 	ContextIds: 2
244; DUMP: 	CalleeEdges:
245; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
246; DUMP: 	CallerEdges:
247; DUMP: 		Edge from Callee [[FOO3]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
248; DUMP:         Clone of [[FOO]]
249
250; DUMP: Node [[BAR2]]
251; DUMP: 	  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7	(clone 0)
252; DUMP: 	AllocTypes: Cold
253; DUMP: 	ContextIds: 2
254; DUMP: 	CalleeEdges:
255; DUMP: 	CallerEdges:
256; DUMP: 		Edge from Callee [[BAR2]] to Caller: [[FOO3]] AllocTypes: Cold ContextIds: 2
257; DUMP:         Clone of [[BAR]]
258
259
260; REMARKS: created clone _Z3barv.memprof.1
261; REMARKS: created clone _Z3foov.memprof.1
262; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
263; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3barv.memprof.1
264; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
265; REMARKS: call in clone main assigned to call function clone _Z3foov
266; REMARKS: call in clone _Z3foov assigned to call function clone _Z3barv
267; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
268; REMARKS: call in clone _Z3bazv marked with memprof allocation attribute notcold
269
270
271; IR: define internal {{.*}} @_Z3barv()
272; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
273; IR: define internal {{.*}} @_Z3foov()
274; IR:   call {{.*}} @_Z3barv()
275; IR: define {{.*}} @main()
276;; The first call to foo does not allocate cold memory. It should call the
277;; original functions, which ultimately call the original allocation decorated
278;; with a "notcold" attribute.
279; IR:   call {{.*}} @_Z3foov()
280;; The second call to foo allocates cold memory. It should call cloned functions
281;; which ultimately call a cloned allocation decorated with a "cold" attribute.
282; IR:   call {{.*}} @_Z3foov.memprof.1()
283; IR: define internal {{.*}} @_Z3barv.memprof.1()
284; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
285; IR: define internal {{.*}} @_Z3foov.memprof.1()
286; IR:   call {{.*}} @_Z3barv.memprof.1()
287; IR: attributes #[[NOTCOLD]] = { builtin "memprof"="notcold" }
288; IR: attributes #[[COLD]] = { builtin "memprof"="cold" }
289
290
291; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
292; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
293; STATS: 2 memprof-context-disambiguation - Number of function clones created during whole program analysis
294
295
296; DOT: digraph "postbuild" {
297; DOT: 	label="postbuild";
298; DOT: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
299; DOT: 	Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
300; DOT: 	Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
301; DOT: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
302; DOT: 	Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
303; DOT: 	Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
304; DOT: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
305; DOT: 	Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"];
306; DOT: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
307; DOT: 	Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
308; DOT: 	Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
309; DOT: 	Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
310; DOT: }
311
312
313; DOTCLONED: digraph "cloned" {
314; DOTCLONED: 	label="cloned";
315; DOTCLONED: 	Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
316; DOTCLONED: 	Node[[FOO2:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
317; DOTCLONED: 	Node[[FOO2]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
318; DOTCLONED: 	Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
319; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO2]][tooltip="ContextIds: 3",fillcolor="brown1"];
320; DOTCLONED: 	Node[[MAIN1]] -> Node[[FOO:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
321; DOTCLONED: 	Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
322; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"];
323; DOTCLONED: 	Node[[MAIN2]] -> Node[[FOO3:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
324; DOTCLONED: 	Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
325; DOTCLONED: 	Node[[FOO]] [shape=record,tooltip="N[[FOO]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
326; DOTCLONED: 	Node[[FOO]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
327; DOTCLONED: 	Node[[FOO3]] [shape=record,tooltip="N[[FOO3]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
328; DOTCLONED: 	Node[[FOO3]] -> Node[[BAR2:0x[a-z0-9]+]][tooltip="ContextIds: 2",fillcolor="cyan"];
329; DOTCLONED: 	Node[[BAR2]] [shape=record,tooltip="N[[BAR2]] ContextIds: 2",fillcolor="cyan",style="filled",color="blue",style="filled,bold,dashed",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
330; DOTCLONED: }
331