xref: /llvm-project/llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll (revision 78a195e1002dbfdfaeb7b36d5699e58b47238cbb)
1;; Test context disambiguation for a callgraph containing multiple memprof
2;; contexts and no inlining, where we need to perform additional cloning
3;; during function assignment/cloning to handle the combination of contexts
4;; to 2 different allocations.
5;;
6;; void E(char **buf1, char **buf2) {
7;;   *buf1 = new char[10];
8;;   *buf2 = new char[10];
9;; }
10;;
11;; void B(char **buf1, char **buf2) {
12;;   E(buf1, buf2);
13;; }
14;;
15;; void C(char **buf1, char **buf2) {
16;;   E(buf1, buf2);
17;; }
18;;
19;; void D(char **buf1, char **buf2) {
20;;   E(buf1, buf2);
21;; }
22;; int main(int argc, char **argv) {
23;;   char *cold1, *cold2, *default1, *default2, *default3, *default4;
24;;   B(&default1, &default2);
25;;   C(&default3, &cold1);
26;;   D(&cold2, &default4);
27;;   memset(cold1, 0, 10);
28;;   memset(cold2, 0, 10);
29;;   memset(default1, 0, 10);
30;;   memset(default2, 0, 10);
31;;   memset(default3, 0, 10);
32;;   memset(default4, 0, 10);
33;;   delete[] default1;
34;;   delete[] default2;
35;;   delete[] default3;
36;;   delete[] default4;
37;;   sleep(10);
38;;   delete[] cold1;
39;;   delete[] cold2;
40;;   return 0;
41;; }
42;;
43;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
44;; memory freed after sleep(10) results in cold lifetimes.
45;;
46;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
47
48;; -stats requires asserts
49; REQUIRES: asserts
50
51
52; RUN: opt -thinlto-bc %s >%t.o
53; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
54; RUN:  -supports-hot-cold-new \
55; RUN:  -r=%t.o,main,plx \
56; RUN:  -r=%t.o,_ZdaPv, \
57; RUN:  -r=%t.o,sleep, \
58; RUN:  -r=%t.o,_Znam, \
59; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
60; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
61; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
62; RUN:  --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
63
64; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
65
66
67;; Try again but with distributed ThinLTO
68; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
69; RUN:  -supports-hot-cold-new \
70; RUN:  -thinlto-distributed-indexes \
71; RUN:  -r=%t.o,main,plx \
72; RUN:  -r=%t.o,_ZdaPv, \
73; RUN:  -r=%t.o,sleep, \
74; RUN:  -r=%t.o,_Znam, \
75; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
76; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
77; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
78; RUN:  --check-prefix=STATS
79
80;; Run ThinLTO backend
81; RUN: opt -passes=memprof-context-disambiguation \
82; RUN:  -memprof-import-summary=%t.o.thinlto.bc \
83; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
84; RUN:  %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
85; RUN:  --check-prefix=STATS-BE --check-prefix=REMARKS
86
87
88source_filename = "funcassigncloning.ll"
89target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
90target triple = "x86_64-unknown-linux-gnu"
91
92; Function Attrs: noinline optnone
93define internal void @_Z1EPPcS0_(ptr %buf1, ptr %buf2) #0 {
94entry:
95  %call = call ptr @_Znam(i64 noundef 10), !memprof !0, !callsite !7
96  %call1 = call ptr @_Znam(i64 noundef 10), !memprof !8, !callsite !15
97  ret void
98}
99
100declare ptr @_Znam(i64)
101
102define internal void @_Z1BPPcS0_() {
103entry:
104  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !16
105  ret void
106}
107
108define internal void @_Z1CPPcS0_() {
109entry:
110  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !17
111  ret void
112}
113
114define internal void @_Z1DPPcS0_() {
115entry:
116  call void @_Z1EPPcS0_(ptr null, ptr null), !callsite !18
117  ret void
118}
119
120; Function Attrs: noinline optnone
121define i32 @main() #0 {
122entry:
123  call void @_Z1BPPcS0_()
124  call void @_Z1CPPcS0_()
125  call void @_Z1DPPcS0_()
126  ret i32 0
127}
128
129declare void @_ZdaPv()
130
131declare i32 @sleep()
132
133; uselistorder directives
134uselistorder ptr @_Znam, { 1, 0 }
135
136attributes #0 = { noinline optnone }
137
138!0 = !{!1, !3, !5}
139!1 = !{!2, !"cold"}
140!2 = !{i64 -3461278137325233666, i64 -7799663586031895603}
141!3 = !{!4, !"notcold"}
142!4 = !{i64 -3461278137325233666, i64 -3483158674395044949}
143!5 = !{!6, !"notcold"}
144!6 = !{i64 -3461278137325233666, i64 -2441057035866683071}
145!7 = !{i64 -3461278137325233666}
146!8 = !{!9, !11, !13}
147!9 = !{!10, !"notcold"}
148!10 = !{i64 -1415475215210681400, i64 -2441057035866683071}
149!11 = !{!12, !"cold"}
150!12 = !{i64 -1415475215210681400, i64 -3483158674395044949}
151!13 = !{!14, !"notcold"}
152!14 = !{i64 -1415475215210681400, i64 -7799663586031895603}
153!15 = !{i64 -1415475215210681400}
154!16 = !{i64 -2441057035866683071}
155!17 = !{i64 -3483158674395044949}
156!18 = !{i64 -7799663586031895603}
157
158
159;; Originally we create a single clone of each call to new from E, since each
160;; allocates cold memory for a single caller.
161
162; DUMP: CCG after cloning:
163; DUMP: Callsite Context Graph:
164; DUMP: Node [[ENEW1ORIG:0x[a-z0-9]+]]
165; DUMP:         Versions: 1 MIB:
166; DUMP:                 AllocType 2 StackIds: 0
167; DUMP:                 AllocType 1 StackIds: 1
168; DUMP:                 AllocType 1 StackIds: 2
169; DUMP:         (clone 0)
170; DUMP: 	AllocTypes: NotCold
171; DUMP: 	ContextIds: 2 3
172; DUMP: 	CalleeEdges:
173; DUMP: 	CallerEdges:
174; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
175; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
176; DUMP: 	Clones: [[ENEW1CLONE:0x[a-z0-9]+]]
177
178; DUMP: Node [[D:0x[a-z0-9]+]]
179; DUMP: 	Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 0 (clone 0)
180; DUMP: 	AllocTypes: NotColdCold
181; DUMP: 	ContextIds: 1 6
182; DUMP: 	CalleeEdges:
183; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
184; DUMP: 		Edge from Callee [[ENEW2ORIG:0x[a-z0-9]+]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
185; DUMP: 	CallerEdges:
186
187; DUMP: Node [[C]]
188; DUMP: 	Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 1 (clone 0)
189; DUMP: 	AllocTypes: NotColdCold
190; DUMP: 	ContextIds: 2 5
191; DUMP: 	CalleeEdges:
192; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[C]] AllocTypes: NotCold ContextIds: 2
193; DUMP: 		Edge from Callee [[ENEW2CLONE:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
194; DUMP: 	CallerEdges:
195
196; DUMP: Node [[B]]
197; DUMP: 	Callee: 16147627620923572899 (_Z1EPPcS0_) Clones: 0 StackIds: 2 (clone 0)
198; DUMP: 	AllocTypes: NotCold
199; DUMP: 	ContextIds: 3 4
200; DUMP: 	CalleeEdges:
201; DUMP: 		Edge from Callee [[ENEW1ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 3
202; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
203; DUMP: 	CallerEdges:
204
205; DUMP: Node [[ENEW2ORIG]]
206; DUMP:         Versions: 1 MIB:
207; DUMP:                 AllocType 1 StackIds: 2
208; DUMP:                 AllocType 2 StackIds: 1
209; DUMP:                 AllocType 1 StackIds: 0
210; DUMP:         (clone 0)
211; DUMP: 	AllocTypes: NotCold
212; DUMP: 	ContextIds: 4 6
213; DUMP: 	CalleeEdges:
214; DUMP: 	CallerEdges:
215; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[B]] AllocTypes: NotCold ContextIds: 4
216; DUMP: 		Edge from Callee [[ENEW2ORIG]] to Caller: [[D]] AllocTypes: NotCold ContextIds: 6
217; DUMP: 	Clones: [[ENEW2CLONE]]
218
219; DUMP: Node [[ENEW1CLONE]]
220; DUMP:         Versions: 1 MIB:
221; DUMP:                 AllocType 2 StackIds: 0
222; DUMP:                 AllocType 1 StackIds: 1
223; DUMP:                 AllocType 1 StackIds: 2
224; DUMP:         (clone 0)
225; DUMP: 	AllocTypes: Cold
226; DUMP: 	ContextIds: 1
227; DUMP: 	CalleeEdges:
228; DUMP: 	CallerEdges:
229; DUMP: 		Edge from Callee [[ENEW1CLONE]] to Caller: [[D]] AllocTypes: Cold ContextIds: 1
230; DUMP: 	Clone of [[ENEW1ORIG]]
231
232; DUMP: Node [[ENEW2CLONE]]
233; DUMP:         Versions: 1 MIB:
234; DUMP:                 AllocType 1 StackIds: 2
235; DUMP:                 AllocType 2 StackIds: 1
236; DUMP:                 AllocType 1 StackIds: 0
237; DUMP:         (clone 0)
238; DUMP: 	AllocTypes: Cold
239; DUMP: 	ContextIds: 5
240; DUMP: 	CalleeEdges:
241; DUMP: 	CallerEdges:
242; DUMP: 		Edge from Callee [[ENEW2CLONE]] to Caller: [[C]] AllocTypes: Cold ContextIds: 5
243; DUMP: 	Clone of [[ENEW2ORIG]]
244
245
246;; We greedily create a clone of E that is initially used by the clones of the
247;; first call to new. However, we end up with an incompatible set of callers
248;; given the second call to new which has clones with a different combination of
249;; callers. Eventually, we create 2 more clones, and the first clone becomes dead.
250; REMARKS: created clone _Z1EPPcS0_.memprof.1
251; REMARKS: created clone _Z1EPPcS0_.memprof.2
252; REMARKS: created clone _Z1EPPcS0_.memprof.3
253; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
254; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute cold
255; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute notcold
256; REMARKS: call in clone _Z1EPPcS0_ marked with memprof allocation attribute notcold
257; REMARKS: call in clone _Z1EPPcS0_.memprof.2 marked with memprof allocation attribute notcold
258; REMARKS: call in clone _Z1EPPcS0_.memprof.3 marked with memprof allocation attribute cold
259; REMARKS: call in clone _Z1CPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.3
260; REMARKS: call in clone _Z1DPPcS0_ assigned to call function clone _Z1EPPcS0_.memprof.2
261
262
263;; Original version of E is used for the non-cold allocations, both from B.
264; IR: define internal {{.*}} @_Z1EPPcS0_(
265; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
266; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
267; IR: define internal {{.*}} @_Z1BPPcS0_(
268; IR:   call {{.*}} @_Z1EPPcS0_(
269;; C calls a clone of E with the first new allocating cold memory and the
270;; second allocating non-cold memory.
271; IR: define internal {{.*}} @_Z1CPPcS0_(
272; IR:   call {{.*}} @_Z1EPPcS0_.memprof.3(
273;; D calls a clone of E with the first new allocating non-cold memory and the
274;; second allocating cold memory.
275; IR: define internal {{.*}} @_Z1DPPcS0_(
276; IR:   call {{.*}} @_Z1EPPcS0_.memprof.2(
277; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
278; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
279; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
280; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
281; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
282; IR:   call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
283; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
284; IR: attributes #[[COLD]] = { "memprof"="cold" }
285
286
287; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
288; STATS-BE: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
289; STATS: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
290; STATS-BE: 4 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
291; STATS-BE: 8 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
292; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
293; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
294; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
295; STATS-BE: 4 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
296; STATS-BE: 2 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
297