xref: /llvm-project/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll (revision 78a195e1002dbfdfaeb7b36d5699e58b47238cbb)
1;; Test callsite context graph generation for call graph with with MIBs
2;; that have pruned contexts that partially match multiple inlined
3;; callsite contexts, requiring duplication of context ids and nodes
4;; while matching callsite nodes onto the graph. Also tests graph and IR
5;; cloning.
6;;
7;; Original code looks like:
8;;
9;; char *D() {
10;;   return new char[10];
11;; }
12;;
13;; char *F() {
14;;   return D();
15;; }
16;;
17;; char *C() {
18;;   return D();
19;; }
20;;
21;; char *B() {
22;;   return C();
23;; }
24;;
25;; char *E() {
26;;   return C();
27;; }
28;; int main(int argc, char **argv) {
29;;   char *x = B(); // cold
30;;   char *y = E(); // cold
31;;   char *z = F(); // default
32;;   memset(x, 0, 10);
33;;   memset(y, 0, 10);
34;;   memset(z, 0, 10);
35;;   delete[] z;
36;;   sleep(10);
37;;   delete[] x;
38;;   delete[] y;
39;;   return 0;
40;; }
41;;
42;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
43;; memory freed after sleep(10) results in cold lifetimes.
44;;
45;; The code below was created by forcing inlining of C into both B and E.
46;; Since both allocation contexts via C are cold, the matched memprof
47;; metadata has the context pruned above C's callsite. This requires
48;; matching the stack node for C to callsites where it was inlined (i.e.
49;; the callsites in B and E that have callsite metadata that includes C's).
50;; It also requires duplication of that node in the graph as well as the
51;; duplication of the context ids along that path through the graph,
52;; so that we can represent the duplicated (via inlining) C callsite.
53;;
54;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
55
56;; -stats requires asserts
57; REQUIRES: asserts
58
59; RUN: opt -thinlto-bc %s >%t.o
60; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
61; RUN:	-supports-hot-cold-new \
62; RUN:  -r=%t.o,main,plx \
63; RUN:  -r=%t.o,_ZdaPv, \
64; RUN:  -r=%t.o,sleep, \
65; RUN:  -r=%t.o,_Znam, \
66; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
67; RUN:  -stats -pass-remarks=memprof-context-disambiguation -save-temps \
68; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
69; RUN:  --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
70
71; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
72
73
74;; Try again but with distributed ThinLTO
75; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
76; RUN:	-supports-hot-cold-new \
77; RUN:  -thinlto-distributed-indexes \
78; RUN:  -r=%t.o,main,plx \
79; RUN:  -r=%t.o,_ZdaPv, \
80; RUN:  -r=%t.o,sleep, \
81; RUN:  -r=%t.o,_Znam, \
82; RUN:  -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
83; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
84; RUN:  -o %t2.out 2>&1 | FileCheck %s --check-prefix=DUMP \
85; RUN:  --check-prefix=STATS
86
87
88;; Check distributed index
89; RUN: llvm-dis %t.o.thinlto.bc -o - | FileCheck %s --check-prefix=DISTRIB
90
91;; Run ThinLTO backend
92; RUN: opt -passes=memprof-context-disambiguation \
93; RUN:  -memprof-import-summary=%t.o.thinlto.bc \
94; RUN:  -stats -pass-remarks=memprof-context-disambiguation \
95; RUN:  %t.o -S 2>&1 | FileCheck %s --check-prefix=IR \
96; RUN:  --check-prefix=STATS-BE --check-prefix=REMARKS
97
98source_filename = "duplicate-context-ids.ll"
99target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
100target triple = "x86_64-unknown-linux-gnu"
101
102define internal ptr @_Z1Dv() #0 {
103entry:
104  %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5
105  ret ptr null
106}
107
108declare ptr @_Znam(i64)
109
110define internal ptr @_Z1Fv() #0 {
111entry:
112  %call = call ptr @_Z1Dv(), !callsite !6
113  ret ptr null
114}
115
116define internal ptr @_Z1Cv() #0 {
117entry:
118  %call = call ptr @_Z1Dv(), !callsite !7
119  ret ptr null
120}
121
122define internal ptr @_Z1Bv() #0 {
123entry:
124  %call.i = call ptr @_Z1Dv(), !callsite !8
125  ret ptr null
126}
127
128define internal ptr @_Z1Ev() #0 {
129entry:
130  %call.i = call ptr @_Z1Dv(), !callsite !9
131  ret ptr null
132}
133
134define i32 @main() #0 {
135entry:
136  call ptr @_Z1Bv()
137  call ptr @_Z1Ev()
138  call ptr @_Z1Fv()
139  ret i32 0
140}
141
142declare void @_ZdaPv()
143
144declare i32 @sleep()
145
146attributes #0 = { noinline optnone}
147
148!0 = !{!1, !3}
149!1 = !{!2, !"cold"}
150!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
151!3 = !{!4, !"notcold"}
152!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
153!5 = !{i64 6541423618768552252}
154!6 = !{i64 -4903163940066524832}
155!7 = !{i64 -6270142974039008131}
156!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
157!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
158
159
160;; After adding only the alloc node memprof metadata, we only have 2 contexts.
161
162; DUMP: CCG before updating call stack chains:
163; DUMP: Callsite Context Graph:
164; DUMP: Node [[D:0x[a-z0-9]+]]
165; DUMP: 	Versions: 1 MIB:
166; DUMP: 		AllocType 2 StackIds: 0
167; DUMP: 		AllocType 1 StackIds: 1
168; DUMP: 	(clone 0)
169; DUMP: 	AllocTypes: NotColdCold
170; DUMP: 	ContextIds: 1 2
171; DUMP: 	CalleeEdges:
172; DUMP: 	CallerEdges:
173; DUMP: 		Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
174; DUMP: 		Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
175
176;; After updating for callsite metadata, we should have generated context ids 3 and 4,
177;; along with 2 new nodes for those callsites. All have the same allocation type
178;; behavior as the original C node.
179
180; DUMP: CCG before cloning:
181; DUMP: Callsite Context Graph:
182; DUMP: Node [[D]]
183; DUMP: 	Versions: 1 MIB:
184; DUMP: 		AllocType 2 StackIds: 0
185; DUMP: 		AllocType 1 StackIds: 1
186; DUMP: 	(clone 0)
187; DUMP: 	AllocTypes: NotColdCold
188; DUMP: 	ContextIds: 1 2 3 4
189; DUMP: 	CalleeEdges:
190; DUMP: 	CallerEdges:
191; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
192; DUMP: 		Edge from Callee [[D]] to Caller: [[C1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
193; DUMP: 		Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
194; DUMP: 		Edge from Callee [[D]] to Caller: [[C0:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
195
196; DUMP: CCG after cloning:
197; DUMP: Callsite Context Graph:
198; DUMP: Node [[D]]
199; DUMP:         Versions: 1 MIB:
200; DUMP:                 AllocType 2 StackIds: 0
201; DUMP:                 AllocType 1 StackIds: 1
202; DUMP:         (clone 0)
203; DUMP: 	AllocTypes: NotCold
204; DUMP: 	ContextIds: 2
205; DUMP: 	CalleeEdges:
206; DUMP: 	CallerEdges:
207; DUMP: 		Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
208; DUMP:         Clones: [[D2:0x[a-z0-9]+]]
209
210; DUMP: Node [[D2]]
211; DUMP:         Versions: 1 MIB:
212; DUMP:                 AllocType 2 StackIds: 0
213; DUMP:                 AllocType 1 StackIds: 1
214; DUMP:         (clone 0)
215; DUMP: 	AllocTypes: Cold
216; DUMP: 	ContextIds: 1 3 4
217; DUMP: 	CalleeEdges:
218; DUMP: 	CallerEdges:
219; DUMP: 		Edge from Callee [[D2]] to Caller: [[C0]] AllocTypes: Cold ContextIds: 1
220; DUMP: 		Edge from Callee [[D2]] to Caller: [[C1]] AllocTypes: Cold ContextIds: 3
221; DUMP: 		Edge from Callee [[D2]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 4
222; DUMP:         Clone of [[D]]
223
224; REMARKS: created clone _Z1Dv.memprof.1
225; REMARKS: call in clone _Z1Dv marked with memprof allocation attribute notcold
226; REMARKS: call in clone _Z1Dv.memprof.1 marked with memprof allocation attribute cold
227; REMARKS: call in clone _Z1Bv assigned to call function clone _Z1Dv.memprof.1
228; REMARKS: call in clone _Z1Ev assigned to call function clone _Z1Dv.memprof.1
229
230
231;; The allocation via F does not allocate cold memory. It should call the
232;; original D, which ultimately call the original allocation decorated
233;; with a "notcold" attribute.
234; IR: define internal {{.*}} @_Z1Dv()
235; IR:   call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
236; IR: define internal {{.*}} @_Z1Fv()
237; IR:   call {{.*}} @_Z1Dv()
238;; The allocations via B and E allocate cold memory. They should call the
239;; cloned D, which ultimately call the cloned allocation decorated with a
240;; "cold" attribute.
241; IR: define internal {{.*}} @_Z1Bv()
242; IR:   call {{.*}} @_Z1Dv.memprof.1()
243; IR: define internal {{.*}} @_Z1Ev()
244; IR:   call {{.*}} @_Z1Dv.memprof.1()
245; IR: define internal {{.*}} @_Z1Dv.memprof.1()
246; IR:   call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
247; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
248; IR: attributes #[[COLD]] = { "memprof"="cold" }
249
250
251; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
252; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
253; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
254; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
255; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
256; STATS: 1 memprof-context-disambiguation - Number of function clones created during whole program analysis
257; STATS-BE: 1 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
258; STATS-BE: 1 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
259; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
260; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
261
262
263; DISTRIB: ^[[E:[0-9]+]] = gv: (guid: 331966645857188136, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
264; DISTRIB: ^[[D]] = gv: (guid: 11079124245221721799, {{.*}} allocs: ((versions: (notcold, cold)
265; DISTRIB: ^[[F:[0-9]+]] = gv: (guid: 11254287701717398916, {{.*}} callsites: ((callee: ^[[D]], clones: (0)
266; DISTRIB: ^[[B:[0-9]+]] = gv: (guid: 13579056193435805313, {{.*}} callsites: ((callee: ^[[D]], clones: (1)
267; DISTRIB: ^[[C:[0-9]+]] = gv: (guid: 15101436305866936160, {{.*}} callsites: ((callee: ^[[D:[0-9]+]], clones: (1)
268