1*a9fa9459Szrj /* cg_print.c - Print routines for displaying call graphs.
2*a9fa9459Szrj
3*a9fa9459Szrj Copyright (C) 2000-2016 Free Software Foundation, Inc.
4*a9fa9459Szrj
5*a9fa9459Szrj This file is part of GNU Binutils.
6*a9fa9459Szrj
7*a9fa9459Szrj This program is free software; you can redistribute it and/or modify
8*a9fa9459Szrj it under the terms of the GNU General Public License as published by
9*a9fa9459Szrj the Free Software Foundation; either version 3 of the License, or
10*a9fa9459Szrj (at your option) any later version.
11*a9fa9459Szrj
12*a9fa9459Szrj This program is distributed in the hope that it will be useful,
13*a9fa9459Szrj but WITHOUT ANY WARRANTY; without even the implied warranty of
14*a9fa9459Szrj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15*a9fa9459Szrj GNU General Public License for more details.
16*a9fa9459Szrj
17*a9fa9459Szrj You should have received a copy of the GNU General Public License
18*a9fa9459Szrj along with this program; if not, write to the Free Software
19*a9fa9459Szrj Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20*a9fa9459Szrj 02110-1301, USA. */
21*a9fa9459Szrj
22*a9fa9459Szrj #include "gprof.h"
23*a9fa9459Szrj #include "libiberty.h"
24*a9fa9459Szrj #include "filenames.h"
25*a9fa9459Szrj #include "search_list.h"
26*a9fa9459Szrj #include "source.h"
27*a9fa9459Szrj #include "symtab.h"
28*a9fa9459Szrj #include "cg_arcs.h"
29*a9fa9459Szrj #include "cg_print.h"
30*a9fa9459Szrj #include "hist.h"
31*a9fa9459Szrj #include "utils.h"
32*a9fa9459Szrj #include "corefile.h"
33*a9fa9459Szrj
34*a9fa9459Szrj /* Return value of comparison functions used to sort tables. */
35*a9fa9459Szrj #define LESSTHAN -1
36*a9fa9459Szrj #define EQUALTO 0
37*a9fa9459Szrj #define GREATERTHAN 1
38*a9fa9459Szrj
39*a9fa9459Szrj static void print_header (void);
40*a9fa9459Szrj static void print_cycle (Sym *);
41*a9fa9459Szrj static int cmp_member (Sym *, Sym *);
42*a9fa9459Szrj static void sort_members (Sym *);
43*a9fa9459Szrj static void print_members (Sym *);
44*a9fa9459Szrj static int cmp_arc (Arc *, Arc *);
45*a9fa9459Szrj static void sort_parents (Sym *);
46*a9fa9459Szrj static void print_parents (Sym *);
47*a9fa9459Szrj static void sort_children (Sym *);
48*a9fa9459Szrj static void print_children (Sym *);
49*a9fa9459Szrj static void print_line (Sym *);
50*a9fa9459Szrj static int cmp_name (const PTR, const PTR);
51*a9fa9459Szrj static int cmp_arc_count (const PTR, const PTR);
52*a9fa9459Szrj static int cmp_fun_nuses (const PTR, const PTR);
53*a9fa9459Szrj static void order_and_dump_functions_by_arcs
54*a9fa9459Szrj (Arc **, unsigned long, int, Arc **, unsigned long *);
55*a9fa9459Szrj
56*a9fa9459Szrj /* Declarations of automatically generated functions to output blurbs. */
57*a9fa9459Szrj extern void bsd_callg_blurb (FILE * fp);
58*a9fa9459Szrj extern void fsf_callg_blurb (FILE * fp);
59*a9fa9459Szrj
60*a9fa9459Szrj double print_time = 0.0;
61*a9fa9459Szrj
62*a9fa9459Szrj
63*a9fa9459Szrj static void
print_header(void)64*a9fa9459Szrj print_header (void)
65*a9fa9459Szrj {
66*a9fa9459Szrj if (first_output)
67*a9fa9459Szrj first_output = FALSE;
68*a9fa9459Szrj else
69*a9fa9459Szrj printf ("\f\n");
70*a9fa9459Szrj
71*a9fa9459Szrj if (!bsd_style_output)
72*a9fa9459Szrj {
73*a9fa9459Szrj if (print_descriptions)
74*a9fa9459Szrj printf (_("\t\t Call graph (explanation follows)\n\n"));
75*a9fa9459Szrj else
76*a9fa9459Szrj printf (_("\t\t\tCall graph\n\n"));
77*a9fa9459Szrj }
78*a9fa9459Szrj
79*a9fa9459Szrj printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80*a9fa9459Szrj (long) hist_scale * (long) sizeof (UNIT));
81*a9fa9459Szrj
82*a9fa9459Szrj if (print_time > 0.0)
83*a9fa9459Szrj printf (_(" for %.2f%% of %.2f seconds\n\n"),
84*a9fa9459Szrj 100.0 / print_time, print_time / hz);
85*a9fa9459Szrj else
86*a9fa9459Szrj {
87*a9fa9459Szrj printf (_(" no time propagated\n\n"));
88*a9fa9459Szrj
89*a9fa9459Szrj /* This doesn't hurt, since all the numerators will be 0.0. */
90*a9fa9459Szrj print_time = 1.0;
91*a9fa9459Szrj }
92*a9fa9459Szrj
93*a9fa9459Szrj if (bsd_style_output)
94*a9fa9459Szrj {
95*a9fa9459Szrj printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96*a9fa9459Szrj "", "", "", "", _("called"), _("total"), _("parents"));
97*a9fa9459Szrj printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98*a9fa9459Szrj _("index"),
99*a9fa9459Szrj /* xgettext:no-c-format */
100*a9fa9459Szrj _("%time"),
101*a9fa9459Szrj _("self"), _("descendants"), _("called"), _("self"),
102*a9fa9459Szrj _("name"), _("index"));
103*a9fa9459Szrj printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
104*a9fa9459Szrj "", "", "", "", _("called"), _("total"), _("children"));
105*a9fa9459Szrj printf ("\n");
106*a9fa9459Szrj }
107*a9fa9459Szrj else
108*a9fa9459Szrj {
109*a9fa9459Szrj printf (_("index %% time self children called name\n"));
110*a9fa9459Szrj }
111*a9fa9459Szrj }
112*a9fa9459Szrj
113*a9fa9459Szrj /* Print a cycle header. */
114*a9fa9459Szrj
115*a9fa9459Szrj static void
print_cycle(Sym * cyc)116*a9fa9459Szrj print_cycle (Sym *cyc)
117*a9fa9459Szrj {
118*a9fa9459Szrj char buf[BUFSIZ];
119*a9fa9459Szrj
120*a9fa9459Szrj sprintf (buf, "[%d]", cyc->cg.index);
121*a9fa9459Szrj printf (bsd_style_output
122*a9fa9459Szrj ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
123*a9fa9459Szrj : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
124*a9fa9459Szrj 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
125*a9fa9459Szrj cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
126*a9fa9459Szrj
127*a9fa9459Szrj if (cyc->cg.self_calls != 0)
128*a9fa9459Szrj printf ("+%-7lu", cyc->cg.self_calls);
129*a9fa9459Szrj else
130*a9fa9459Szrj printf (" %7.7s", "");
131*a9fa9459Szrj
132*a9fa9459Szrj printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
133*a9fa9459Szrj }
134*a9fa9459Szrj
135*a9fa9459Szrj /* Compare LEFT and RIGHT membmer. Major comparison key is
136*a9fa9459Szrj CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
137*a9fa9459Szrj
138*a9fa9459Szrj static int
cmp_member(Sym * left,Sym * right)139*a9fa9459Szrj cmp_member (Sym *left, Sym *right)
140*a9fa9459Szrj {
141*a9fa9459Szrj double left_time = left->cg.prop.self + left->cg.prop.child;
142*a9fa9459Szrj double right_time = right->cg.prop.self + right->cg.prop.child;
143*a9fa9459Szrj unsigned long left_calls = left->ncalls + left->cg.self_calls;
144*a9fa9459Szrj unsigned long right_calls = right->ncalls + right->cg.self_calls;
145*a9fa9459Szrj
146*a9fa9459Szrj if (left_time > right_time)
147*a9fa9459Szrj return GREATERTHAN;
148*a9fa9459Szrj
149*a9fa9459Szrj if (left_time < right_time)
150*a9fa9459Szrj return LESSTHAN;
151*a9fa9459Szrj
152*a9fa9459Szrj if (left_calls > right_calls)
153*a9fa9459Szrj return GREATERTHAN;
154*a9fa9459Szrj
155*a9fa9459Szrj if (left_calls < right_calls)
156*a9fa9459Szrj return LESSTHAN;
157*a9fa9459Szrj
158*a9fa9459Szrj return EQUALTO;
159*a9fa9459Szrj }
160*a9fa9459Szrj
161*a9fa9459Szrj /* Sort members of a cycle. */
162*a9fa9459Szrj
163*a9fa9459Szrj static void
sort_members(Sym * cyc)164*a9fa9459Szrj sort_members (Sym *cyc)
165*a9fa9459Szrj {
166*a9fa9459Szrj Sym *todo, *doing, *prev;
167*a9fa9459Szrj
168*a9fa9459Szrj /* Detach cycle members from cyclehead,
169*a9fa9459Szrj and insertion sort them back on. */
170*a9fa9459Szrj todo = cyc->cg.cyc.next;
171*a9fa9459Szrj cyc->cg.cyc.next = 0;
172*a9fa9459Szrj
173*a9fa9459Szrj for (doing = todo; doing != NULL; doing = todo)
174*a9fa9459Szrj {
175*a9fa9459Szrj todo = doing->cg.cyc.next;
176*a9fa9459Szrj
177*a9fa9459Szrj for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178*a9fa9459Szrj {
179*a9fa9459Szrj if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180*a9fa9459Szrj break;
181*a9fa9459Szrj }
182*a9fa9459Szrj
183*a9fa9459Szrj doing->cg.cyc.next = prev->cg.cyc.next;
184*a9fa9459Szrj prev->cg.cyc.next = doing;
185*a9fa9459Szrj }
186*a9fa9459Szrj }
187*a9fa9459Szrj
188*a9fa9459Szrj /* Print the members of a cycle. */
189*a9fa9459Szrj
190*a9fa9459Szrj static void
print_members(Sym * cyc)191*a9fa9459Szrj print_members (Sym *cyc)
192*a9fa9459Szrj {
193*a9fa9459Szrj Sym *member;
194*a9fa9459Szrj
195*a9fa9459Szrj sort_members (cyc);
196*a9fa9459Szrj
197*a9fa9459Szrj for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
198*a9fa9459Szrj {
199*a9fa9459Szrj printf (bsd_style_output
200*a9fa9459Szrj ? "%6.6s %5.5s %7.2f %11.2f %7lu"
201*a9fa9459Szrj : "%6.6s %5.5s %7.2f %7.2f %7lu",
202*a9fa9459Szrj "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
203*a9fa9459Szrj member->ncalls);
204*a9fa9459Szrj
205*a9fa9459Szrj if (member->cg.self_calls != 0)
206*a9fa9459Szrj printf ("+%-7lu", member->cg.self_calls);
207*a9fa9459Szrj else
208*a9fa9459Szrj printf (" %7.7s", "");
209*a9fa9459Szrj
210*a9fa9459Szrj printf (" ");
211*a9fa9459Szrj print_name (member);
212*a9fa9459Szrj printf ("\n");
213*a9fa9459Szrj }
214*a9fa9459Szrj }
215*a9fa9459Szrj
216*a9fa9459Szrj /* Compare two arcs to/from the same child/parent.
217*a9fa9459Szrj - if one arc is a self arc, it's least.
218*a9fa9459Szrj - if one arc is within a cycle, it's less than.
219*a9fa9459Szrj - if both arcs are within a cycle, compare arc counts.
220*a9fa9459Szrj - if neither arc is within a cycle, compare with
221*a9fa9459Szrj time + child_time as major key
222*a9fa9459Szrj arc count as minor key. */
223*a9fa9459Szrj
224*a9fa9459Szrj static int
cmp_arc(Arc * left,Arc * right)225*a9fa9459Szrj cmp_arc (Arc *left, Arc *right)
226*a9fa9459Szrj {
227*a9fa9459Szrj Sym *left_parent = left->parent;
228*a9fa9459Szrj Sym *left_child = left->child;
229*a9fa9459Szrj Sym *right_parent = right->parent;
230*a9fa9459Szrj Sym *right_child = right->child;
231*a9fa9459Szrj double left_time, right_time;
232*a9fa9459Szrj
233*a9fa9459Szrj DBG (TIMEDEBUG,
234*a9fa9459Szrj printf ("[cmp_arc] ");
235*a9fa9459Szrj print_name (left_parent);
236*a9fa9459Szrj printf (" calls ");
237*a9fa9459Szrj print_name (left_child);
238*a9fa9459Szrj printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
239*a9fa9459Szrj left->count, left_child->ncalls);
240*a9fa9459Szrj printf ("[cmp_arc] ");
241*a9fa9459Szrj print_name (right_parent);
242*a9fa9459Szrj printf (" calls ");
243*a9fa9459Szrj print_name (right_child);
244*a9fa9459Szrj printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
245*a9fa9459Szrj right->count, right_child->ncalls);
246*a9fa9459Szrj printf ("\n");
247*a9fa9459Szrj );
248*a9fa9459Szrj
249*a9fa9459Szrj if (left_parent == left_child)
250*a9fa9459Szrj return LESSTHAN; /* Left is a self call. */
251*a9fa9459Szrj
252*a9fa9459Szrj if (right_parent == right_child)
253*a9fa9459Szrj return GREATERTHAN; /* Right is a self call. */
254*a9fa9459Szrj
255*a9fa9459Szrj if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
256*a9fa9459Szrj && left_parent->cg.cyc.num == left_child->cg.cyc.num)
257*a9fa9459Szrj {
258*a9fa9459Szrj /* Left is a call within a cycle. */
259*a9fa9459Szrj if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
260*a9fa9459Szrj && right_parent->cg.cyc.num == right_child->cg.cyc.num)
261*a9fa9459Szrj {
262*a9fa9459Szrj /* Right is a call within the cycle, too. */
263*a9fa9459Szrj if (left->count < right->count)
264*a9fa9459Szrj return LESSTHAN;
265*a9fa9459Szrj
266*a9fa9459Szrj if (left->count > right->count)
267*a9fa9459Szrj return GREATERTHAN;
268*a9fa9459Szrj
269*a9fa9459Szrj return EQUALTO;
270*a9fa9459Szrj }
271*a9fa9459Szrj else
272*a9fa9459Szrj {
273*a9fa9459Szrj /* Right isn't a call within the cycle. */
274*a9fa9459Szrj return LESSTHAN;
275*a9fa9459Szrj }
276*a9fa9459Szrj }
277*a9fa9459Szrj else
278*a9fa9459Szrj {
279*a9fa9459Szrj /* Left isn't a call within a cycle. */
280*a9fa9459Szrj if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
281*a9fa9459Szrj && right_parent->cg.cyc.num == right_child->cg.cyc.num)
282*a9fa9459Szrj {
283*a9fa9459Szrj /* Right is a call within a cycle. */
284*a9fa9459Szrj return GREATERTHAN;
285*a9fa9459Szrj }
286*a9fa9459Szrj else
287*a9fa9459Szrj {
288*a9fa9459Szrj /* Neither is a call within a cycle. */
289*a9fa9459Szrj left_time = left->time + left->child_time;
290*a9fa9459Szrj right_time = right->time + right->child_time;
291*a9fa9459Szrj
292*a9fa9459Szrj if (left_time < right_time)
293*a9fa9459Szrj return LESSTHAN;
294*a9fa9459Szrj
295*a9fa9459Szrj if (left_time > right_time)
296*a9fa9459Szrj return GREATERTHAN;
297*a9fa9459Szrj
298*a9fa9459Szrj if (left->count < right->count)
299*a9fa9459Szrj return LESSTHAN;
300*a9fa9459Szrj
301*a9fa9459Szrj if (left->count > right->count)
302*a9fa9459Szrj return GREATERTHAN;
303*a9fa9459Szrj
304*a9fa9459Szrj return EQUALTO;
305*a9fa9459Szrj }
306*a9fa9459Szrj }
307*a9fa9459Szrj }
308*a9fa9459Szrj
309*a9fa9459Szrj
310*a9fa9459Szrj static void
sort_parents(Sym * child)311*a9fa9459Szrj sort_parents (Sym * child)
312*a9fa9459Szrj {
313*a9fa9459Szrj Arc *arc, *detached, sorted, *prev;
314*a9fa9459Szrj
315*a9fa9459Szrj /* Unlink parents from child, then insertion sort back on to
316*a9fa9459Szrj sorted's parents.
317*a9fa9459Szrj *arc the arc you have detached and are inserting.
318*a9fa9459Szrj *detached the rest of the arcs to be sorted.
319*a9fa9459Szrj sorted arc list onto which you insertion sort.
320*a9fa9459Szrj *prev arc before the arc you are comparing. */
321*a9fa9459Szrj sorted.next_parent = 0;
322*a9fa9459Szrj
323*a9fa9459Szrj for (arc = child->cg.parents; arc; arc = detached)
324*a9fa9459Szrj {
325*a9fa9459Szrj detached = arc->next_parent;
326*a9fa9459Szrj
327*a9fa9459Szrj /* Consider *arc as disconnected; insert it into sorted. */
328*a9fa9459Szrj for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
329*a9fa9459Szrj {
330*a9fa9459Szrj if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
331*a9fa9459Szrj break;
332*a9fa9459Szrj }
333*a9fa9459Szrj
334*a9fa9459Szrj arc->next_parent = prev->next_parent;
335*a9fa9459Szrj prev->next_parent = arc;
336*a9fa9459Szrj }
337*a9fa9459Szrj
338*a9fa9459Szrj /* Reattach sorted arcs to child. */
339*a9fa9459Szrj child->cg.parents = sorted.next_parent;
340*a9fa9459Szrj }
341*a9fa9459Szrj
342*a9fa9459Szrj
343*a9fa9459Szrj static void
print_parents(Sym * child)344*a9fa9459Szrj print_parents (Sym *child)
345*a9fa9459Szrj {
346*a9fa9459Szrj Sym *parent;
347*a9fa9459Szrj Arc *arc;
348*a9fa9459Szrj Sym *cycle_head;
349*a9fa9459Szrj
350*a9fa9459Szrj if (child->cg.cyc.head != 0)
351*a9fa9459Szrj cycle_head = child->cg.cyc.head;
352*a9fa9459Szrj else
353*a9fa9459Szrj cycle_head = child;
354*a9fa9459Szrj
355*a9fa9459Szrj if (!child->cg.parents)
356*a9fa9459Szrj {
357*a9fa9459Szrj printf (bsd_style_output
358*a9fa9459Szrj ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
359*a9fa9459Szrj : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
360*a9fa9459Szrj "", "", "", "", "", "");
361*a9fa9459Szrj return;
362*a9fa9459Szrj }
363*a9fa9459Szrj
364*a9fa9459Szrj sort_parents (child);
365*a9fa9459Szrj
366*a9fa9459Szrj for (arc = child->cg.parents; arc; arc = arc->next_parent)
367*a9fa9459Szrj {
368*a9fa9459Szrj parent = arc->parent;
369*a9fa9459Szrj if (child == parent || (child->cg.cyc.num != 0
370*a9fa9459Szrj && parent->cg.cyc.num == child->cg.cyc.num))
371*a9fa9459Szrj {
372*a9fa9459Szrj /* Selfcall or call among siblings. */
373*a9fa9459Szrj printf (bsd_style_output
374*a9fa9459Szrj ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
375*a9fa9459Szrj : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
376*a9fa9459Szrj "", "", "", "",
377*a9fa9459Szrj arc->count, "");
378*a9fa9459Szrj print_name (parent);
379*a9fa9459Szrj printf ("\n");
380*a9fa9459Szrj }
381*a9fa9459Szrj else
382*a9fa9459Szrj {
383*a9fa9459Szrj /* Regular parent of child. */
384*a9fa9459Szrj printf (bsd_style_output
385*a9fa9459Szrj ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
386*a9fa9459Szrj : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
387*a9fa9459Szrj "", "",
388*a9fa9459Szrj arc->time / hz, arc->child_time / hz,
389*a9fa9459Szrj arc->count, cycle_head->ncalls);
390*a9fa9459Szrj print_name (parent);
391*a9fa9459Szrj printf ("\n");
392*a9fa9459Szrj }
393*a9fa9459Szrj }
394*a9fa9459Szrj }
395*a9fa9459Szrj
396*a9fa9459Szrj
397*a9fa9459Szrj static void
sort_children(Sym * parent)398*a9fa9459Szrj sort_children (Sym *parent)
399*a9fa9459Szrj {
400*a9fa9459Szrj Arc *arc, *detached, sorted, *prev;
401*a9fa9459Szrj
402*a9fa9459Szrj /* Unlink children from parent, then insertion sort back on to
403*a9fa9459Szrj sorted's children.
404*a9fa9459Szrj *arc the arc you have detached and are inserting.
405*a9fa9459Szrj *detached the rest of the arcs to be sorted.
406*a9fa9459Szrj sorted arc list onto which you insertion sort.
407*a9fa9459Szrj *prev arc before the arc you are comparing. */
408*a9fa9459Szrj sorted.next_child = 0;
409*a9fa9459Szrj
410*a9fa9459Szrj for (arc = parent->cg.children; arc; arc = detached)
411*a9fa9459Szrj {
412*a9fa9459Szrj detached = arc->next_child;
413*a9fa9459Szrj
414*a9fa9459Szrj /* Consider *arc as disconnected; insert it into sorted. */
415*a9fa9459Szrj for (prev = &sorted; prev->next_child; prev = prev->next_child)
416*a9fa9459Szrj {
417*a9fa9459Szrj if (cmp_arc (arc, prev->next_child) != LESSTHAN)
418*a9fa9459Szrj break;
419*a9fa9459Szrj }
420*a9fa9459Szrj
421*a9fa9459Szrj arc->next_child = prev->next_child;
422*a9fa9459Szrj prev->next_child = arc;
423*a9fa9459Szrj }
424*a9fa9459Szrj
425*a9fa9459Szrj /* Reattach sorted children to parent. */
426*a9fa9459Szrj parent->cg.children = sorted.next_child;
427*a9fa9459Szrj }
428*a9fa9459Szrj
429*a9fa9459Szrj
430*a9fa9459Szrj static void
print_children(Sym * parent)431*a9fa9459Szrj print_children (Sym *parent)
432*a9fa9459Szrj {
433*a9fa9459Szrj Sym *child;
434*a9fa9459Szrj Arc *arc;
435*a9fa9459Szrj
436*a9fa9459Szrj sort_children (parent);
437*a9fa9459Szrj arc = parent->cg.children;
438*a9fa9459Szrj
439*a9fa9459Szrj for (arc = parent->cg.children; arc; arc = arc->next_child)
440*a9fa9459Szrj {
441*a9fa9459Szrj child = arc->child;
442*a9fa9459Szrj if (child == parent || (child->cg.cyc.num != 0
443*a9fa9459Szrj && child->cg.cyc.num == parent->cg.cyc.num))
444*a9fa9459Szrj {
445*a9fa9459Szrj /* Self call or call to sibling. */
446*a9fa9459Szrj printf (bsd_style_output
447*a9fa9459Szrj ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
448*a9fa9459Szrj : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
449*a9fa9459Szrj "", "", "", "", arc->count, "");
450*a9fa9459Szrj print_name (child);
451*a9fa9459Szrj printf ("\n");
452*a9fa9459Szrj }
453*a9fa9459Szrj else
454*a9fa9459Szrj {
455*a9fa9459Szrj /* Regular child of parent. */
456*a9fa9459Szrj printf (bsd_style_output
457*a9fa9459Szrj ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
458*a9fa9459Szrj : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
459*a9fa9459Szrj "", "",
460*a9fa9459Szrj arc->time / hz, arc->child_time / hz,
461*a9fa9459Szrj arc->count, child->cg.cyc.head->ncalls);
462*a9fa9459Szrj print_name (child);
463*a9fa9459Szrj printf ("\n");
464*a9fa9459Szrj }
465*a9fa9459Szrj }
466*a9fa9459Szrj }
467*a9fa9459Szrj
468*a9fa9459Szrj
469*a9fa9459Szrj static void
print_line(Sym * np)470*a9fa9459Szrj print_line (Sym *np)
471*a9fa9459Szrj {
472*a9fa9459Szrj char buf[BUFSIZ];
473*a9fa9459Szrj
474*a9fa9459Szrj sprintf (buf, "[%d]", np->cg.index);
475*a9fa9459Szrj printf (bsd_style_output
476*a9fa9459Szrj ? "%-6.6s %5.1f %7.2f %11.2f"
477*a9fa9459Szrj : "%-6.6s %5.1f %7.2f %7.2f", buf,
478*a9fa9459Szrj 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
479*a9fa9459Szrj np->cg.prop.self / hz, np->cg.prop.child / hz);
480*a9fa9459Szrj
481*a9fa9459Szrj if ((np->ncalls + np->cg.self_calls) != 0)
482*a9fa9459Szrj {
483*a9fa9459Szrj printf (" %7lu", np->ncalls);
484*a9fa9459Szrj
485*a9fa9459Szrj if (np->cg.self_calls != 0)
486*a9fa9459Szrj printf ("+%-7lu ", np->cg.self_calls);
487*a9fa9459Szrj else
488*a9fa9459Szrj printf (" %7.7s ", "");
489*a9fa9459Szrj }
490*a9fa9459Szrj else
491*a9fa9459Szrj {
492*a9fa9459Szrj printf (" %7.7s %7.7s ", "", "");
493*a9fa9459Szrj }
494*a9fa9459Szrj
495*a9fa9459Szrj print_name (np);
496*a9fa9459Szrj printf ("\n");
497*a9fa9459Szrj }
498*a9fa9459Szrj
499*a9fa9459Szrj
500*a9fa9459Szrj /* Print dynamic call graph. */
501*a9fa9459Szrj
502*a9fa9459Szrj void
cg_print(Sym ** timesortsym)503*a9fa9459Szrj cg_print (Sym ** timesortsym)
504*a9fa9459Szrj {
505*a9fa9459Szrj unsigned int sym_index;
506*a9fa9459Szrj Sym *parent;
507*a9fa9459Szrj
508*a9fa9459Szrj if (print_descriptions && bsd_style_output)
509*a9fa9459Szrj bsd_callg_blurb (stdout);
510*a9fa9459Szrj
511*a9fa9459Szrj print_header ();
512*a9fa9459Szrj
513*a9fa9459Szrj for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
514*a9fa9459Szrj {
515*a9fa9459Szrj parent = timesortsym[sym_index];
516*a9fa9459Szrj
517*a9fa9459Szrj if ((ignore_zeros && parent->ncalls == 0
518*a9fa9459Szrj && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
519*a9fa9459Szrj && parent->cg.prop.child == 0)
520*a9fa9459Szrj || !parent->cg.print_flag
521*a9fa9459Szrj || (line_granularity && ! parent->is_func))
522*a9fa9459Szrj continue;
523*a9fa9459Szrj
524*a9fa9459Szrj if (!parent->name && parent->cg.cyc.num != 0)
525*a9fa9459Szrj {
526*a9fa9459Szrj /* Cycle header. */
527*a9fa9459Szrj print_cycle (parent);
528*a9fa9459Szrj print_members (parent);
529*a9fa9459Szrj }
530*a9fa9459Szrj else
531*a9fa9459Szrj {
532*a9fa9459Szrj print_parents (parent);
533*a9fa9459Szrj print_line (parent);
534*a9fa9459Szrj print_children (parent);
535*a9fa9459Szrj }
536*a9fa9459Szrj
537*a9fa9459Szrj if (bsd_style_output)
538*a9fa9459Szrj printf ("\n");
539*a9fa9459Szrj
540*a9fa9459Szrj printf ("-----------------------------------------------\n");
541*a9fa9459Szrj
542*a9fa9459Szrj if (bsd_style_output)
543*a9fa9459Szrj printf ("\n");
544*a9fa9459Szrj }
545*a9fa9459Szrj
546*a9fa9459Szrj free (timesortsym);
547*a9fa9459Szrj
548*a9fa9459Szrj if (print_descriptions && !bsd_style_output)
549*a9fa9459Szrj fsf_callg_blurb (stdout);
550*a9fa9459Szrj }
551*a9fa9459Szrj
552*a9fa9459Szrj
553*a9fa9459Szrj static int
cmp_name(const PTR left,const PTR right)554*a9fa9459Szrj cmp_name (const PTR left, const PTR right)
555*a9fa9459Szrj {
556*a9fa9459Szrj const Sym **npp1 = (const Sym **) left;
557*a9fa9459Szrj const Sym **npp2 = (const Sym **) right;
558*a9fa9459Szrj
559*a9fa9459Szrj return strcmp ((*npp1)->name, (*npp2)->name);
560*a9fa9459Szrj }
561*a9fa9459Szrj
562*a9fa9459Szrj
563*a9fa9459Szrj void
cg_print_index(void)564*a9fa9459Szrj cg_print_index (void)
565*a9fa9459Szrj {
566*a9fa9459Szrj unsigned int sym_index;
567*a9fa9459Szrj unsigned int nnames, todo, i, j;
568*a9fa9459Szrj int col, starting_col;
569*a9fa9459Szrj Sym **name_sorted_syms, *sym;
570*a9fa9459Szrj const char *filename;
571*a9fa9459Szrj char buf[20];
572*a9fa9459Szrj int column_width = (output_width - 1) / 3; /* Don't write in last col! */
573*a9fa9459Szrj
574*a9fa9459Szrj /* Now, sort regular function name
575*a9fa9459Szrj alphabetically to create an index. */
576*a9fa9459Szrj name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
577*a9fa9459Szrj
578*a9fa9459Szrj for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
579*a9fa9459Szrj {
580*a9fa9459Szrj if (ignore_zeros && symtab.base[sym_index].ncalls == 0
581*a9fa9459Szrj && symtab.base[sym_index].hist.time == 0)
582*a9fa9459Szrj continue;
583*a9fa9459Szrj
584*a9fa9459Szrj name_sorted_syms[nnames++] = &symtab.base[sym_index];
585*a9fa9459Szrj }
586*a9fa9459Szrj
587*a9fa9459Szrj qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
588*a9fa9459Szrj
589*a9fa9459Szrj for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
590*a9fa9459Szrj name_sorted_syms[todo++] = &cycle_header[sym_index];
591*a9fa9459Szrj
592*a9fa9459Szrj printf ("\f\n");
593*a9fa9459Szrj printf (_("Index by function name\n\n"));
594*a9fa9459Szrj sym_index = (todo + 2) / 3;
595*a9fa9459Szrj
596*a9fa9459Szrj for (i = 0; i < sym_index; i++)
597*a9fa9459Szrj {
598*a9fa9459Szrj col = 0;
599*a9fa9459Szrj starting_col = 0;
600*a9fa9459Szrj
601*a9fa9459Szrj for (j = i; j < todo; j += sym_index)
602*a9fa9459Szrj {
603*a9fa9459Szrj sym = name_sorted_syms[j];
604*a9fa9459Szrj
605*a9fa9459Szrj if (sym->cg.print_flag)
606*a9fa9459Szrj sprintf (buf, "[%d]", sym->cg.index);
607*a9fa9459Szrj else
608*a9fa9459Szrj sprintf (buf, "(%d)", sym->cg.index);
609*a9fa9459Szrj
610*a9fa9459Szrj if (j < nnames)
611*a9fa9459Szrj {
612*a9fa9459Szrj if (bsd_style_output)
613*a9fa9459Szrj {
614*a9fa9459Szrj printf ("%6.6s %-19.19s", buf, sym->name);
615*a9fa9459Szrj }
616*a9fa9459Szrj else
617*a9fa9459Szrj {
618*a9fa9459Szrj col += strlen (buf);
619*a9fa9459Szrj
620*a9fa9459Szrj for (; col < starting_col + 5; ++col)
621*a9fa9459Szrj putchar (' ');
622*a9fa9459Szrj
623*a9fa9459Szrj printf (" %s ", buf);
624*a9fa9459Szrj col += print_name_only (sym);
625*a9fa9459Szrj
626*a9fa9459Szrj if (!line_granularity && sym->is_static && sym->file)
627*a9fa9459Szrj {
628*a9fa9459Szrj filename = sym->file->name;
629*a9fa9459Szrj
630*a9fa9459Szrj if (!print_path)
631*a9fa9459Szrj {
632*a9fa9459Szrj filename = strrchr (filename, '/');
633*a9fa9459Szrj
634*a9fa9459Szrj if (filename)
635*a9fa9459Szrj ++filename;
636*a9fa9459Szrj else
637*a9fa9459Szrj filename = sym->file->name;
638*a9fa9459Szrj }
639*a9fa9459Szrj
640*a9fa9459Szrj printf (" (%s)", filename);
641*a9fa9459Szrj col += strlen (filename) + 3;
642*a9fa9459Szrj }
643*a9fa9459Szrj }
644*a9fa9459Szrj }
645*a9fa9459Szrj else
646*a9fa9459Szrj {
647*a9fa9459Szrj if (bsd_style_output)
648*a9fa9459Szrj {
649*a9fa9459Szrj printf ("%6.6s ", buf);
650*a9fa9459Szrj sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
651*a9fa9459Szrj printf ("%-19.19s", buf);
652*a9fa9459Szrj }
653*a9fa9459Szrj else
654*a9fa9459Szrj {
655*a9fa9459Szrj col += strlen (buf);
656*a9fa9459Szrj for (; col < starting_col + 5; ++col)
657*a9fa9459Szrj putchar (' ');
658*a9fa9459Szrj printf (" %s ", buf);
659*a9fa9459Szrj sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
660*a9fa9459Szrj printf ("%s", buf);
661*a9fa9459Szrj col += strlen (buf);
662*a9fa9459Szrj }
663*a9fa9459Szrj }
664*a9fa9459Szrj
665*a9fa9459Szrj starting_col += column_width;
666*a9fa9459Szrj }
667*a9fa9459Szrj
668*a9fa9459Szrj printf ("\n");
669*a9fa9459Szrj }
670*a9fa9459Szrj
671*a9fa9459Szrj free (name_sorted_syms);
672*a9fa9459Szrj }
673*a9fa9459Szrj
674*a9fa9459Szrj /* Compare two arcs based on their usage counts.
675*a9fa9459Szrj We want to sort in descending order. */
676*a9fa9459Szrj
677*a9fa9459Szrj static int
cmp_arc_count(const PTR left,const PTR right)678*a9fa9459Szrj cmp_arc_count (const PTR left, const PTR right)
679*a9fa9459Szrj {
680*a9fa9459Szrj const Arc **npp1 = (const Arc **) left;
681*a9fa9459Szrj const Arc **npp2 = (const Arc **) right;
682*a9fa9459Szrj
683*a9fa9459Szrj if ((*npp1)->count > (*npp2)->count)
684*a9fa9459Szrj return -1;
685*a9fa9459Szrj else if ((*npp1)->count < (*npp2)->count)
686*a9fa9459Szrj return 1;
687*a9fa9459Szrj else
688*a9fa9459Szrj return 0;
689*a9fa9459Szrj }
690*a9fa9459Szrj
691*a9fa9459Szrj /* Compare two funtions based on their usage counts.
692*a9fa9459Szrj We want to sort in descending order. */
693*a9fa9459Szrj
694*a9fa9459Szrj static int
cmp_fun_nuses(const PTR left,const PTR right)695*a9fa9459Szrj cmp_fun_nuses (const PTR left, const PTR right)
696*a9fa9459Szrj {
697*a9fa9459Szrj const Sym **npp1 = (const Sym **) left;
698*a9fa9459Szrj const Sym **npp2 = (const Sym **) right;
699*a9fa9459Szrj
700*a9fa9459Szrj if ((*npp1)->nuses > (*npp2)->nuses)
701*a9fa9459Szrj return -1;
702*a9fa9459Szrj else if ((*npp1)->nuses < (*npp2)->nuses)
703*a9fa9459Szrj return 1;
704*a9fa9459Szrj else
705*a9fa9459Szrj return 0;
706*a9fa9459Szrj }
707*a9fa9459Szrj
708*a9fa9459Szrj /* Print a suggested function ordering based on the profiling data.
709*a9fa9459Szrj
710*a9fa9459Szrj We perform 4 major steps when ordering functions:
711*a9fa9459Szrj
712*a9fa9459Szrj * Group unused functions together and place them at the
713*a9fa9459Szrj end of the function order.
714*a9fa9459Szrj
715*a9fa9459Szrj * Search the highest use arcs (those which account for 90% of
716*a9fa9459Szrj the total arc count) for functions which have several parents.
717*a9fa9459Szrj
718*a9fa9459Szrj Group those with the most call sites together (currently the
719*a9fa9459Szrj top 1.25% which have at least five different call sites).
720*a9fa9459Szrj
721*a9fa9459Szrj These are emitted at the start of the function order.
722*a9fa9459Szrj
723*a9fa9459Szrj * Use a greedy placement algorithm to place functions which
724*a9fa9459Szrj occur in the top 99% of the arcs in the profile. Some provisions
725*a9fa9459Szrj are made to handle high usage arcs where the parent and/or
726*a9fa9459Szrj child has already been placed.
727*a9fa9459Szrj
728*a9fa9459Szrj * Run the same greedy placement algorithm on the remaining
729*a9fa9459Szrj arcs to place the leftover functions.
730*a9fa9459Szrj
731*a9fa9459Szrj
732*a9fa9459Szrj The various "magic numbers" should (one day) be tuneable by command
733*a9fa9459Szrj line options. They were arrived at by benchmarking a few applications
734*a9fa9459Szrj with various values to see which values produced better overall function
735*a9fa9459Szrj orderings.
736*a9fa9459Szrj
737*a9fa9459Szrj Of course, profiling errors, machine limitations (PA long calls), and
738*a9fa9459Szrj poor cutoff values for the placement algorithm may limit the usefullness
739*a9fa9459Szrj of the resulting function order. Improvements would be greatly appreciated.
740*a9fa9459Szrj
741*a9fa9459Szrj Suggestions:
742*a9fa9459Szrj
743*a9fa9459Szrj * Place the functions with many callers near the middle of the
744*a9fa9459Szrj list to reduce long calls.
745*a9fa9459Szrj
746*a9fa9459Szrj * Propagate arc usage changes as functions are placed. Ie if
747*a9fa9459Szrj func1 and func2 are placed together, arcs to/from those arcs
748*a9fa9459Szrj to the same parent/child should be combined, then resort the
749*a9fa9459Szrj arcs to choose the next one.
750*a9fa9459Szrj
751*a9fa9459Szrj * Implement some global positioning algorithm to place the
752*a9fa9459Szrj chains made by the greedy local positioning algorithm. Probably
753*a9fa9459Szrj by examining arcs which haven't been placed yet to tie two
754*a9fa9459Szrj chains together.
755*a9fa9459Szrj
756*a9fa9459Szrj * Take a function's size and time into account in the algorithm;
757*a9fa9459Szrj size in particular is important on the PA (long calls). Placing
758*a9fa9459Szrj many small functions onto their own page may be wise.
759*a9fa9459Szrj
760*a9fa9459Szrj * Use better profiling information; many published algorithms
761*a9fa9459Szrj are based on call sequences through time, rather than just
762*a9fa9459Szrj arc counts.
763*a9fa9459Szrj
764*a9fa9459Szrj * Prodecure cloning could improve performance when a small number
765*a9fa9459Szrj of arcs account for most of the calls to a particular function.
766*a9fa9459Szrj
767*a9fa9459Szrj * Use relocation information to avoid moving unused functions
768*a9fa9459Szrj completely out of the code stream; this would avoid severe lossage
769*a9fa9459Szrj when the profile data bears little resemblance to actual runs.
770*a9fa9459Szrj
771*a9fa9459Szrj * Propagation of arc usages should also improve .o link line
772*a9fa9459Szrj ordering which shares the same arc placement algorithm with
773*a9fa9459Szrj the function ordering code (in fact it is a degenerate case
774*a9fa9459Szrj of function ordering). */
775*a9fa9459Szrj
776*a9fa9459Szrj void
cg_print_function_ordering(void)777*a9fa9459Szrj cg_print_function_ordering (void)
778*a9fa9459Szrj {
779*a9fa9459Szrj unsigned long sym_index;
780*a9fa9459Szrj unsigned long arc_index;
781*a9fa9459Szrj unsigned long used, unused, scratch_index;
782*a9fa9459Szrj unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
783*a9fa9459Szrj #ifdef __GNUC__
784*a9fa9459Szrj unsigned long long total_arcs, tmp_arcs_count;
785*a9fa9459Szrj #else
786*a9fa9459Szrj unsigned long total_arcs, tmp_arcs_count;
787*a9fa9459Szrj #endif
788*a9fa9459Szrj Sym **unused_syms, **used_syms, **scratch_syms;
789*a9fa9459Szrj Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
790*a9fa9459Szrj
791*a9fa9459Szrj sym_index = 0;
792*a9fa9459Szrj used = 0;
793*a9fa9459Szrj unused = 0;
794*a9fa9459Szrj scratch_index = 0;
795*a9fa9459Szrj unplaced_arc_count = 0;
796*a9fa9459Szrj high_arc_count = 0;
797*a9fa9459Szrj scratch_arc_count = 0;
798*a9fa9459Szrj
799*a9fa9459Szrj /* First group all the unused functions together. */
800*a9fa9459Szrj unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801*a9fa9459Szrj used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802*a9fa9459Szrj scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803*a9fa9459Szrj high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804*a9fa9459Szrj scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805*a9fa9459Szrj unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806*a9fa9459Szrj
807*a9fa9459Szrj /* Walk through all the functions; mark those which are never
808*a9fa9459Szrj called as placed (we'll emit them as a group later). */
809*a9fa9459Szrj for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
810*a9fa9459Szrj {
811*a9fa9459Szrj if (symtab.base[sym_index].ncalls == 0)
812*a9fa9459Szrj {
813*a9fa9459Szrj unused_syms[unused++] = &symtab.base[sym_index];
814*a9fa9459Szrj symtab.base[sym_index].has_been_placed = 1;
815*a9fa9459Szrj }
816*a9fa9459Szrj else
817*a9fa9459Szrj {
818*a9fa9459Szrj used_syms[used++] = &symtab.base[sym_index];
819*a9fa9459Szrj symtab.base[sym_index].has_been_placed = 0;
820*a9fa9459Szrj symtab.base[sym_index].next = 0;
821*a9fa9459Szrj symtab.base[sym_index].prev = 0;
822*a9fa9459Szrj symtab.base[sym_index].nuses = 0;
823*a9fa9459Szrj }
824*a9fa9459Szrj }
825*a9fa9459Szrj
826*a9fa9459Szrj /* Sort the arcs from most used to least used. */
827*a9fa9459Szrj qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
828*a9fa9459Szrj
829*a9fa9459Szrj /* Compute the total arc count. Also mark arcs as unplaced.
830*a9fa9459Szrj
831*a9fa9459Szrj Note we don't compensate for overflow if that happens!
832*a9fa9459Szrj Overflow is much less likely when this file is compiled
833*a9fa9459Szrj with GCC as it can double-wide integers via long long. */
834*a9fa9459Szrj total_arcs = 0;
835*a9fa9459Szrj for (arc_index = 0; arc_index < numarcs; arc_index++)
836*a9fa9459Szrj {
837*a9fa9459Szrj total_arcs += arcs[arc_index]->count;
838*a9fa9459Szrj arcs[arc_index]->has_been_placed = 0;
839*a9fa9459Szrj }
840*a9fa9459Szrj
841*a9fa9459Szrj /* We want to pull out those functions which are referenced
842*a9fa9459Szrj by many highly used arcs and emit them as a group. This
843*a9fa9459Szrj could probably use some tuning. */
844*a9fa9459Szrj tmp_arcs_count = 0;
845*a9fa9459Szrj for (arc_index = 0; arc_index < numarcs; arc_index++)
846*a9fa9459Szrj {
847*a9fa9459Szrj tmp_arcs_count += arcs[arc_index]->count;
848*a9fa9459Szrj
849*a9fa9459Szrj /* Count how many times each parent and child are used up
850*a9fa9459Szrj to our threshhold of arcs (90%). */
851*a9fa9459Szrj if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
852*a9fa9459Szrj break;
853*a9fa9459Szrj
854*a9fa9459Szrj arcs[arc_index]->child->nuses++;
855*a9fa9459Szrj }
856*a9fa9459Szrj
857*a9fa9459Szrj /* Now sort a temporary symbol table based on the number of
858*a9fa9459Szrj times each function was used in the highest used arcs. */
859*a9fa9459Szrj memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
860*a9fa9459Szrj qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
861*a9fa9459Szrj
862*a9fa9459Szrj /* Now pick out those symbols we're going to emit as
863*a9fa9459Szrj a group. We take up to 1.25% of the used symbols. */
864*a9fa9459Szrj for (sym_index = 0; sym_index < used / 80; sym_index++)
865*a9fa9459Szrj {
866*a9fa9459Szrj Sym *sym = scratch_syms[sym_index];
867*a9fa9459Szrj Arc *arc;
868*a9fa9459Szrj
869*a9fa9459Szrj /* If we hit symbols that aren't used from many call sites,
870*a9fa9459Szrj then we can quit. We choose five as the low limit for
871*a9fa9459Szrj no particular reason. */
872*a9fa9459Szrj if (sym->nuses == 5)
873*a9fa9459Szrj break;
874*a9fa9459Szrj
875*a9fa9459Szrj /* We're going to need the arcs between these functions.
876*a9fa9459Szrj Unfortunately, we don't know all these functions
877*a9fa9459Szrj until we're done. So we keep track of all the arcs
878*a9fa9459Szrj to the functions we care about, then prune out those
879*a9fa9459Szrj which are uninteresting.
880*a9fa9459Szrj
881*a9fa9459Szrj An interesting variation would be to quit when we found
882*a9fa9459Szrj multi-call site functions which account for some percentage
883*a9fa9459Szrj of the arcs. */
884*a9fa9459Szrj arc = sym->cg.children;
885*a9fa9459Szrj
886*a9fa9459Szrj while (arc)
887*a9fa9459Szrj {
888*a9fa9459Szrj if (arc->parent != arc->child)
889*a9fa9459Szrj scratch_arcs[scratch_arc_count++] = arc;
890*a9fa9459Szrj arc->has_been_placed = 1;
891*a9fa9459Szrj arc = arc->next_child;
892*a9fa9459Szrj }
893*a9fa9459Szrj
894*a9fa9459Szrj arc = sym->cg.parents;
895*a9fa9459Szrj
896*a9fa9459Szrj while (arc)
897*a9fa9459Szrj {
898*a9fa9459Szrj if (arc->parent != arc->child)
899*a9fa9459Szrj scratch_arcs[scratch_arc_count++] = arc;
900*a9fa9459Szrj arc->has_been_placed = 1;
901*a9fa9459Szrj arc = arc->next_parent;
902*a9fa9459Szrj }
903*a9fa9459Szrj
904*a9fa9459Szrj /* Keep track of how many symbols we're going to place. */
905*a9fa9459Szrj scratch_index = sym_index;
906*a9fa9459Szrj
907*a9fa9459Szrj /* A lie, but it makes identifying
908*a9fa9459Szrj these functions easier later. */
909*a9fa9459Szrj sym->has_been_placed = 1;
910*a9fa9459Szrj }
911*a9fa9459Szrj
912*a9fa9459Szrj /* Now walk through the temporary arcs and copy
913*a9fa9459Szrj those we care about into the high arcs array. */
914*a9fa9459Szrj for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
915*a9fa9459Szrj {
916*a9fa9459Szrj Arc *arc = scratch_arcs[arc_index];
917*a9fa9459Szrj
918*a9fa9459Szrj /* If this arc refers to highly used functions, then
919*a9fa9459Szrj then we want to keep it. */
920*a9fa9459Szrj if (arc->child->has_been_placed
921*a9fa9459Szrj && arc->parent->has_been_placed)
922*a9fa9459Szrj {
923*a9fa9459Szrj high_arcs[high_arc_count++] = scratch_arcs[arc_index];
924*a9fa9459Szrj
925*a9fa9459Szrj /* We need to turn of has_been_placed since we're going to
926*a9fa9459Szrj use the main arc placement algorithm on these arcs. */
927*a9fa9459Szrj arc->child->has_been_placed = 0;
928*a9fa9459Szrj arc->parent->has_been_placed = 0;
929*a9fa9459Szrj }
930*a9fa9459Szrj }
931*a9fa9459Szrj
932*a9fa9459Szrj /* Dump the multi-site high usage functions which are not
933*a9fa9459Szrj going to be ordered by the main ordering algorithm. */
934*a9fa9459Szrj for (sym_index = 0; sym_index < scratch_index; sym_index++)
935*a9fa9459Szrj {
936*a9fa9459Szrj if (scratch_syms[sym_index]->has_been_placed)
937*a9fa9459Szrj printf ("%s\n", scratch_syms[sym_index]->name);
938*a9fa9459Szrj }
939*a9fa9459Szrj
940*a9fa9459Szrj /* Now we can order the multi-site high use
941*a9fa9459Szrj functions based on the arcs between them. */
942*a9fa9459Szrj qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
943*a9fa9459Szrj order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
944*a9fa9459Szrj unplaced_arcs, &unplaced_arc_count);
945*a9fa9459Szrj
946*a9fa9459Szrj /* Order and dump the high use functions left,
947*a9fa9459Szrj these typically have only a few call sites. */
948*a9fa9459Szrj order_and_dump_functions_by_arcs (arcs, numarcs, 0,
949*a9fa9459Szrj unplaced_arcs, &unplaced_arc_count);
950*a9fa9459Szrj
951*a9fa9459Szrj /* Now place the rarely used functions. */
952*a9fa9459Szrj order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
953*a9fa9459Szrj scratch_arcs, &scratch_arc_count);
954*a9fa9459Szrj
955*a9fa9459Szrj /* Output any functions not emitted by the order_and_dump calls. */
956*a9fa9459Szrj for (sym_index = 0; sym_index < used; sym_index++)
957*a9fa9459Szrj if (used_syms[sym_index]->has_been_placed == 0)
958*a9fa9459Szrj printf("%s\n", used_syms[sym_index]->name);
959*a9fa9459Szrj
960*a9fa9459Szrj /* Output the unused functions. */
961*a9fa9459Szrj for (sym_index = 0; sym_index < unused; sym_index++)
962*a9fa9459Szrj printf("%s\n", unused_syms[sym_index]->name);
963*a9fa9459Szrj
964*a9fa9459Szrj unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965*a9fa9459Szrj used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966*a9fa9459Szrj scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967*a9fa9459Szrj high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968*a9fa9459Szrj scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969*a9fa9459Szrj unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970*a9fa9459Szrj
971*a9fa9459Szrj free (unused_syms);
972*a9fa9459Szrj free (used_syms);
973*a9fa9459Szrj free (scratch_syms);
974*a9fa9459Szrj free (high_arcs);
975*a9fa9459Szrj free (scratch_arcs);
976*a9fa9459Szrj free (unplaced_arcs);
977*a9fa9459Szrj }
978*a9fa9459Szrj
979*a9fa9459Szrj /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980*a9fa9459Szrj place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
981*a9fa9459Szrj
982*a9fa9459Szrj If ALL is nonzero, then place all functions referenced by THE_ARCS,
983*a9fa9459Szrj else only place those referenced in the top 99% of the arcs in THE_ARCS. */
984*a9fa9459Szrj
985*a9fa9459Szrj #define MOST 0.99
986*a9fa9459Szrj static void
order_and_dump_functions_by_arcs(Arc ** the_arcs,unsigned long arc_count,int all,Arc ** unplaced_arcs,unsigned long * unplaced_arc_count)987*a9fa9459Szrj order_and_dump_functions_by_arcs (Arc **the_arcs, unsigned long arc_count,
988*a9fa9459Szrj int all, Arc **unplaced_arcs,
989*a9fa9459Szrj unsigned long *unplaced_arc_count)
990*a9fa9459Szrj {
991*a9fa9459Szrj #ifdef __GNUC__
992*a9fa9459Szrj unsigned long long tmp_arcs, total_arcs;
993*a9fa9459Szrj #else
994*a9fa9459Szrj unsigned long tmp_arcs, total_arcs;
995*a9fa9459Szrj #endif
996*a9fa9459Szrj unsigned int arc_index;
997*a9fa9459Szrj
998*a9fa9459Szrj /* If needed, compute the total arc count.
999*a9fa9459Szrj
1000*a9fa9459Szrj Note we don't compensate for overflow if that happens! */
1001*a9fa9459Szrj if (! all)
1002*a9fa9459Szrj {
1003*a9fa9459Szrj total_arcs = 0;
1004*a9fa9459Szrj for (arc_index = 0; arc_index < arc_count; arc_index++)
1005*a9fa9459Szrj total_arcs += the_arcs[arc_index]->count;
1006*a9fa9459Szrj }
1007*a9fa9459Szrj else
1008*a9fa9459Szrj total_arcs = 0;
1009*a9fa9459Szrj
1010*a9fa9459Szrj tmp_arcs = 0;
1011*a9fa9459Szrj
1012*a9fa9459Szrj for (arc_index = 0; arc_index < arc_count; arc_index++)
1013*a9fa9459Szrj {
1014*a9fa9459Szrj Sym *sym1, *sym2;
1015*a9fa9459Szrj Sym *child, *parent;
1016*a9fa9459Szrj
1017*a9fa9459Szrj tmp_arcs += the_arcs[arc_index]->count;
1018*a9fa9459Szrj
1019*a9fa9459Szrj /* Ignore this arc if it's already been placed. */
1020*a9fa9459Szrj if (the_arcs[arc_index]->has_been_placed)
1021*a9fa9459Szrj continue;
1022*a9fa9459Szrj
1023*a9fa9459Szrj child = the_arcs[arc_index]->child;
1024*a9fa9459Szrj parent = the_arcs[arc_index]->parent;
1025*a9fa9459Szrj
1026*a9fa9459Szrj /* If we're not using all arcs, and this is a rarely used
1027*a9fa9459Szrj arc, then put it on the unplaced_arc list. Similarly
1028*a9fa9459Szrj if both the parent and child of this arc have been placed. */
1029*a9fa9459Szrj if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1030*a9fa9459Szrj || child->has_been_placed || parent->has_been_placed)
1031*a9fa9459Szrj {
1032*a9fa9459Szrj unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1033*a9fa9459Szrj continue;
1034*a9fa9459Szrj }
1035*a9fa9459Szrj
1036*a9fa9459Szrj /* If all slots in the parent and child are full, then there isn't
1037*a9fa9459Szrj anything we can do right now. We'll place this arc on the
1038*a9fa9459Szrj unplaced arc list in the hope that a global positioning
1039*a9fa9459Szrj algorithm can use it to place function chains. */
1040*a9fa9459Szrj if (parent->next && parent->prev && child->next && child->prev)
1041*a9fa9459Szrj {
1042*a9fa9459Szrj unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1043*a9fa9459Szrj continue;
1044*a9fa9459Szrj }
1045*a9fa9459Szrj
1046*a9fa9459Szrj /* If the parent is unattached, then find the closest
1047*a9fa9459Szrj place to attach it onto child's chain. Similarly
1048*a9fa9459Szrj for the opposite case. */
1049*a9fa9459Szrj if (!parent->next && !parent->prev)
1050*a9fa9459Szrj {
1051*a9fa9459Szrj int next_count = 0;
1052*a9fa9459Szrj int prev_count = 0;
1053*a9fa9459Szrj Sym *prev = child;
1054*a9fa9459Szrj Sym *next = child;
1055*a9fa9459Szrj
1056*a9fa9459Szrj /* Walk to the beginning and end of the child's chain. */
1057*a9fa9459Szrj while (next->next)
1058*a9fa9459Szrj {
1059*a9fa9459Szrj next = next->next;
1060*a9fa9459Szrj next_count++;
1061*a9fa9459Szrj }
1062*a9fa9459Szrj
1063*a9fa9459Szrj while (prev->prev)
1064*a9fa9459Szrj {
1065*a9fa9459Szrj prev = prev->prev;
1066*a9fa9459Szrj prev_count++;
1067*a9fa9459Szrj }
1068*a9fa9459Szrj
1069*a9fa9459Szrj /* Choose the closest. */
1070*a9fa9459Szrj child = next_count < prev_count ? next : prev;
1071*a9fa9459Szrj }
1072*a9fa9459Szrj else if (! child->next && !child->prev)
1073*a9fa9459Szrj {
1074*a9fa9459Szrj int next_count = 0;
1075*a9fa9459Szrj int prev_count = 0;
1076*a9fa9459Szrj Sym *prev = parent;
1077*a9fa9459Szrj Sym *next = parent;
1078*a9fa9459Szrj
1079*a9fa9459Szrj while (next->next)
1080*a9fa9459Szrj {
1081*a9fa9459Szrj next = next->next;
1082*a9fa9459Szrj next_count++;
1083*a9fa9459Szrj }
1084*a9fa9459Szrj
1085*a9fa9459Szrj while (prev->prev)
1086*a9fa9459Szrj {
1087*a9fa9459Szrj prev = prev->prev;
1088*a9fa9459Szrj prev_count++;
1089*a9fa9459Szrj }
1090*a9fa9459Szrj
1091*a9fa9459Szrj parent = prev_count < next_count ? prev : next;
1092*a9fa9459Szrj }
1093*a9fa9459Szrj else
1094*a9fa9459Szrj {
1095*a9fa9459Szrj /* Couldn't find anywhere to attach the functions,
1096*a9fa9459Szrj put the arc on the unplaced arc list. */
1097*a9fa9459Szrj unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1098*a9fa9459Szrj continue;
1099*a9fa9459Szrj }
1100*a9fa9459Szrj
1101*a9fa9459Szrj /* Make sure we don't tie two ends together. */
1102*a9fa9459Szrj sym1 = parent;
1103*a9fa9459Szrj if (sym1->next)
1104*a9fa9459Szrj while (sym1->next)
1105*a9fa9459Szrj sym1 = sym1->next;
1106*a9fa9459Szrj else
1107*a9fa9459Szrj while (sym1->prev)
1108*a9fa9459Szrj sym1 = sym1->prev;
1109*a9fa9459Szrj
1110*a9fa9459Szrj sym2 = child;
1111*a9fa9459Szrj if (sym2->next)
1112*a9fa9459Szrj while (sym2->next)
1113*a9fa9459Szrj sym2 = sym2->next;
1114*a9fa9459Szrj else
1115*a9fa9459Szrj while (sym2->prev)
1116*a9fa9459Szrj sym2 = sym2->prev;
1117*a9fa9459Szrj
1118*a9fa9459Szrj if (sym1 == child
1119*a9fa9459Szrj && sym2 == parent)
1120*a9fa9459Szrj {
1121*a9fa9459Szrj /* This would tie two ends together. */
1122*a9fa9459Szrj unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1123*a9fa9459Szrj continue;
1124*a9fa9459Szrj }
1125*a9fa9459Szrj
1126*a9fa9459Szrj if (parent->next)
1127*a9fa9459Szrj {
1128*a9fa9459Szrj /* Must attach to the parent's prev field. */
1129*a9fa9459Szrj if (! child->next)
1130*a9fa9459Szrj {
1131*a9fa9459Szrj /* parent-prev and child-next */
1132*a9fa9459Szrj parent->prev = child;
1133*a9fa9459Szrj child->next = parent;
1134*a9fa9459Szrj the_arcs[arc_index]->has_been_placed = 1;
1135*a9fa9459Szrj }
1136*a9fa9459Szrj }
1137*a9fa9459Szrj else if (parent->prev)
1138*a9fa9459Szrj {
1139*a9fa9459Szrj /* Must attach to the parent's next field. */
1140*a9fa9459Szrj if (! child->prev)
1141*a9fa9459Szrj {
1142*a9fa9459Szrj /* parent-next and child-prev */
1143*a9fa9459Szrj parent->next = child;
1144*a9fa9459Szrj child->prev = parent;
1145*a9fa9459Szrj the_arcs[arc_index]->has_been_placed = 1;
1146*a9fa9459Szrj }
1147*a9fa9459Szrj }
1148*a9fa9459Szrj else
1149*a9fa9459Szrj {
1150*a9fa9459Szrj /* Can attach to either field in the parent, depends
1151*a9fa9459Szrj on where we've got space in the child. */
1152*a9fa9459Szrj if (child->prev)
1153*a9fa9459Szrj {
1154*a9fa9459Szrj /* parent-prev and child-next. */
1155*a9fa9459Szrj parent->prev = child;
1156*a9fa9459Szrj child->next = parent;
1157*a9fa9459Szrj the_arcs[arc_index]->has_been_placed = 1;
1158*a9fa9459Szrj }
1159*a9fa9459Szrj else
1160*a9fa9459Szrj {
1161*a9fa9459Szrj /* parent-next and child-prev. */
1162*a9fa9459Szrj parent->next = child;
1163*a9fa9459Szrj child->prev = parent;
1164*a9fa9459Szrj the_arcs[arc_index]->has_been_placed = 1;
1165*a9fa9459Szrj }
1166*a9fa9459Szrj }
1167*a9fa9459Szrj }
1168*a9fa9459Szrj
1169*a9fa9459Szrj /* Dump the chains of functions we've made. */
1170*a9fa9459Szrj for (arc_index = 0; arc_index < arc_count; arc_index++)
1171*a9fa9459Szrj {
1172*a9fa9459Szrj Sym *sym;
1173*a9fa9459Szrj if (the_arcs[arc_index]->parent->has_been_placed
1174*a9fa9459Szrj || the_arcs[arc_index]->child->has_been_placed)
1175*a9fa9459Szrj continue;
1176*a9fa9459Szrj
1177*a9fa9459Szrj sym = the_arcs[arc_index]->parent;
1178*a9fa9459Szrj
1179*a9fa9459Szrj /* If this symbol isn't attached to any other
1180*a9fa9459Szrj symbols, then we've got a rarely used arc.
1181*a9fa9459Szrj
1182*a9fa9459Szrj Skip it for now, we'll deal with them later. */
1183*a9fa9459Szrj if (sym->next == NULL
1184*a9fa9459Szrj && sym->prev == NULL)
1185*a9fa9459Szrj continue;
1186*a9fa9459Szrj
1187*a9fa9459Szrj /* Get to the start of this chain. */
1188*a9fa9459Szrj while (sym->prev)
1189*a9fa9459Szrj sym = sym->prev;
1190*a9fa9459Szrj
1191*a9fa9459Szrj while (sym)
1192*a9fa9459Szrj {
1193*a9fa9459Szrj /* Mark it as placed. */
1194*a9fa9459Szrj sym->has_been_placed = 1;
1195*a9fa9459Szrj printf ("%s\n", sym->name);
1196*a9fa9459Szrj sym = sym->next;
1197*a9fa9459Szrj }
1198*a9fa9459Szrj }
1199*a9fa9459Szrj
1200*a9fa9459Szrj /* If we want to place all the arcs, then output
1201*a9fa9459Szrj those which weren't placed by the main algorithm. */
1202*a9fa9459Szrj if (all)
1203*a9fa9459Szrj for (arc_index = 0; arc_index < arc_count; arc_index++)
1204*a9fa9459Szrj {
1205*a9fa9459Szrj Sym *sym;
1206*a9fa9459Szrj if (the_arcs[arc_index]->parent->has_been_placed
1207*a9fa9459Szrj || the_arcs[arc_index]->child->has_been_placed)
1208*a9fa9459Szrj continue;
1209*a9fa9459Szrj
1210*a9fa9459Szrj sym = the_arcs[arc_index]->parent;
1211*a9fa9459Szrj
1212*a9fa9459Szrj sym->has_been_placed = 1;
1213*a9fa9459Szrj printf ("%s\n", sym->name);
1214*a9fa9459Szrj }
1215*a9fa9459Szrj }
1216*a9fa9459Szrj
1217*a9fa9459Szrj /* Compare two function_map structs based on file name.
1218*a9fa9459Szrj We want to sort in ascending order. */
1219*a9fa9459Szrj
1220*a9fa9459Szrj static int
cmp_symbol_map(const void * l,const void * r)1221*a9fa9459Szrj cmp_symbol_map (const void * l, const void * r)
1222*a9fa9459Szrj {
1223*a9fa9459Szrj return filename_cmp (((struct function_map *) l)->file_name,
1224*a9fa9459Szrj ((struct function_map *) r)->file_name);
1225*a9fa9459Szrj }
1226*a9fa9459Szrj
1227*a9fa9459Szrj /* Print a suggested .o ordering for files on a link line based
1228*a9fa9459Szrj on profiling information. This uses the function placement
1229*a9fa9459Szrj code for the bulk of its work. */
1230*a9fa9459Szrj
1231*a9fa9459Szrj void
cg_print_file_ordering(void)1232*a9fa9459Szrj cg_print_file_ordering (void)
1233*a9fa9459Szrj {
1234*a9fa9459Szrj unsigned long scratch_arc_count;
1235*a9fa9459Szrj unsigned long arc_index;
1236*a9fa9459Szrj unsigned long sym_index;
1237*a9fa9459Szrj Arc **scratch_arcs;
1238*a9fa9459Szrj char *last;
1239*a9fa9459Szrj
1240*a9fa9459Szrj scratch_arc_count = 0;
1241*a9fa9459Szrj
1242*a9fa9459Szrj scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1243*a9fa9459Szrj for (arc_index = 0; arc_index < numarcs; arc_index++)
1244*a9fa9459Szrj {
1245*a9fa9459Szrj if (! arcs[arc_index]->parent->mapped
1246*a9fa9459Szrj || ! arcs[arc_index]->child->mapped)
1247*a9fa9459Szrj arcs[arc_index]->has_been_placed = 1;
1248*a9fa9459Szrj }
1249*a9fa9459Szrj
1250*a9fa9459Szrj order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1251*a9fa9459Szrj scratch_arcs, &scratch_arc_count);
1252*a9fa9459Szrj
1253*a9fa9459Szrj /* Output .o's not handled by the main placement algorithm. */
1254*a9fa9459Szrj for (sym_index = 0; sym_index < symtab.len; sym_index++)
1255*a9fa9459Szrj {
1256*a9fa9459Szrj if (symtab.base[sym_index].mapped
1257*a9fa9459Szrj && ! symtab.base[sym_index].has_been_placed)
1258*a9fa9459Szrj printf ("%s\n", symtab.base[sym_index].name);
1259*a9fa9459Szrj }
1260*a9fa9459Szrj
1261*a9fa9459Szrj qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1262*a9fa9459Szrj
1263*a9fa9459Szrj /* Now output any .o's that didn't have any text symbols. */
1264*a9fa9459Szrj last = NULL;
1265*a9fa9459Szrj for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1266*a9fa9459Szrj {
1267*a9fa9459Szrj unsigned int index2;
1268*a9fa9459Szrj
1269*a9fa9459Szrj /* Don't bother searching if this symbol
1270*a9fa9459Szrj is the same as the previous one. */
1271*a9fa9459Szrj if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1272*a9fa9459Szrj continue;
1273*a9fa9459Szrj
1274*a9fa9459Szrj for (index2 = 0; index2 < symtab.len; index2++)
1275*a9fa9459Szrj {
1276*a9fa9459Szrj if (! symtab.base[index2].mapped)
1277*a9fa9459Szrj continue;
1278*a9fa9459Szrj
1279*a9fa9459Szrj if (!filename_cmp (symtab.base[index2].name,
1280*a9fa9459Szrj symbol_map[sym_index].file_name))
1281*a9fa9459Szrj break;
1282*a9fa9459Szrj }
1283*a9fa9459Szrj
1284*a9fa9459Szrj /* If we didn't find it in the symbol table, then it must
1285*a9fa9459Szrj be a .o with no text symbols. Output it last. */
1286*a9fa9459Szrj if (index2 == symtab.len)
1287*a9fa9459Szrj printf ("%s\n", symbol_map[sym_index].file_name);
1288*a9fa9459Szrj last = symbol_map[sym_index].file_name;
1289*a9fa9459Szrj }
1290*a9fa9459Szrj }
1291