xref: /netbsd-src/external/gpl3/binutils.old/dist/gprof/cg_print.c (revision 92e958de60c71aa0f2452bd7074cbb006fe6546b)
1 /* cg_print.c -  Print routines for displaying call graphs.
2 
3    Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4    Free Software Foundation, Inc.
5 
6    This file is part of GNU Binutils.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 3 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21    02110-1301, USA.  */
22 
23 #include "gprof.h"
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
27 #include "source.h"
28 #include "symtab.h"
29 #include "cg_arcs.h"
30 #include "cg_print.h"
31 #include "hist.h"
32 #include "utils.h"
33 #include "corefile.h"
34 
35 /* Return value of comparison functions used to sort tables.  */
36 #define	LESSTHAN	-1
37 #define	EQUALTO		0
38 #define	GREATERTHAN	1
39 
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const PTR, const PTR);
52 static int cmp_arc_count (const PTR, const PTR);
53 static int cmp_fun_nuses (const PTR, const PTR);
54 static void order_and_dump_functions_by_arcs
55   (Arc **, unsigned long, int, Arc **, unsigned long *);
56 
57 /* Declarations of automatically generated functions to output blurbs.  */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
60 
61 double print_time = 0.0;
62 
63 
64 static void
65 print_header ()
66 {
67   if (first_output)
68     first_output = FALSE;
69   else
70     printf ("\f\n");
71 
72   if (!bsd_style_output)
73     {
74       if (print_descriptions)
75 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
76       else
77 	printf (_("\t\t\tCall graph\n\n"));
78     }
79 
80   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 	  (long) hist_scale * (long) sizeof (UNIT));
82 
83   if (print_time > 0.0)
84     printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 	    100.0 / print_time, print_time / hz);
86   else
87     {
88       printf (_(" no time propagated\n\n"));
89 
90       /* This doesn't hurt, since all the numerators will be 0.0.  */
91       print_time = 1.0;
92     }
93 
94   if (bsd_style_output)
95     {
96       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
97 	      "", "", "", "", _("called"), _("total"), _("parents"));
98       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 	      _("index"),
100 	      /* xgettext:no-c-format */
101 	      _("%time"),
102 	      _("self"), _("descendants"), _("called"), _("self"),
103 	      _("name"), _("index"));
104       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
105 	      "", "", "", "", _("called"), _("total"), _("children"));
106       printf ("\n");
107     }
108   else
109     {
110       printf (_("index %% time    self  children    called     name\n"));
111     }
112 }
113 
114 /* Print a cycle header.  */
115 
116 static void
117 print_cycle (Sym *cyc)
118 {
119   char buf[BUFSIZ];
120 
121   sprintf (buf, "[%d]", cyc->cg.index);
122   printf (bsd_style_output
123 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
127 
128   if (cyc->cg.self_calls != 0)
129     printf ("+%-7lu", cyc->cg.self_calls);
130   else
131     printf (" %7.7s", "");
132 
133   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
134 }
135 
136 /* Compare LEFT and RIGHT membmer.  Major comparison key is
137    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
138 
139 static int
140 cmp_member (Sym *left, Sym *right)
141 {
142   double left_time = left->cg.prop.self + left->cg.prop.child;
143   double right_time = right->cg.prop.self + right->cg.prop.child;
144   unsigned long left_calls = left->ncalls + left->cg.self_calls;
145   unsigned long right_calls = right->ncalls + right->cg.self_calls;
146 
147   if (left_time > right_time)
148     return GREATERTHAN;
149 
150   if (left_time < right_time)
151     return LESSTHAN;
152 
153   if (left_calls > right_calls)
154     return GREATERTHAN;
155 
156   if (left_calls < right_calls)
157     return LESSTHAN;
158 
159   return EQUALTO;
160 }
161 
162 /* Sort members of a cycle.  */
163 
164 static void
165 sort_members (Sym *cyc)
166 {
167   Sym *todo, *doing, *prev;
168 
169   /* Detach cycle members from cyclehead,
170      and insertion sort them back on.  */
171   todo = cyc->cg.cyc.next;
172   cyc->cg.cyc.next = 0;
173 
174   for (doing = todo; doing != NULL; doing = todo)
175     {
176       todo = doing->cg.cyc.next;
177 
178       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179 	{
180 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181 	    break;
182 	}
183 
184       doing->cg.cyc.next = prev->cg.cyc.next;
185       prev->cg.cyc.next = doing;
186     }
187 }
188 
189 /* Print the members of a cycle.  */
190 
191 static void
192 print_members (Sym *cyc)
193 {
194   Sym *member;
195 
196   sort_members (cyc);
197 
198   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199     {
200       printf (bsd_style_output
201 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 	      member->ncalls);
205 
206       if (member->cg.self_calls != 0)
207 	printf ("+%-7lu", member->cg.self_calls);
208       else
209 	printf (" %7.7s", "");
210 
211       printf ("     ");
212       print_name (member);
213       printf ("\n");
214     }
215 }
216 
217 /* Compare two arcs to/from the same child/parent.
218 	- if one arc is a self arc, it's least.
219 	- if one arc is within a cycle, it's less than.
220 	- if both arcs are within a cycle, compare arc counts.
221 	- if neither arc is within a cycle, compare with
222 		time + child_time as major key
223 		arc count as minor key.  */
224 
225 static int
226 cmp_arc (Arc *left, Arc *right)
227 {
228   Sym *left_parent = left->parent;
229   Sym *left_child = left->child;
230   Sym *right_parent = right->parent;
231   Sym *right_child = right->child;
232   double left_time, right_time;
233 
234   DBG (TIMEDEBUG,
235        printf ("[cmp_arc] ");
236        print_name (left_parent);
237        printf (" calls ");
238        print_name (left_child);
239        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240 	       left->count, left_child->ncalls);
241        printf ("[cmp_arc] ");
242        print_name (right_parent);
243        printf (" calls ");
244        print_name (right_child);
245        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246 	       right->count, right_child->ncalls);
247        printf ("\n");
248     );
249 
250   if (left_parent == left_child)
251     return LESSTHAN;		/* Left is a self call.  */
252 
253   if (right_parent == right_child)
254     return GREATERTHAN;		/* Right is a self call.  */
255 
256   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
258     {
259       /* Left is a call within a cycle.  */
260       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
262 	{
263 	  /* Right is a call within the cycle, too.  */
264 	  if (left->count < right->count)
265 	    return LESSTHAN;
266 
267 	  if (left->count > right->count)
268 	    return GREATERTHAN;
269 
270 	  return EQUALTO;
271 	}
272       else
273 	{
274 	  /* Right isn't a call within the cycle.  */
275 	  return LESSTHAN;
276 	}
277     }
278   else
279     {
280       /* Left isn't a call within a cycle.  */
281       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
283 	{
284 	  /* Right is a call within a cycle.  */
285 	  return GREATERTHAN;
286 	}
287       else
288 	{
289 	  /* Neither is a call within a cycle.  */
290 	  left_time = left->time + left->child_time;
291 	  right_time = right->time + right->child_time;
292 
293 	  if (left_time < right_time)
294 	    return LESSTHAN;
295 
296 	  if (left_time > right_time)
297 	    return GREATERTHAN;
298 
299 	  if (left->count < right->count)
300 	    return LESSTHAN;
301 
302 	  if (left->count > right->count)
303 	    return GREATERTHAN;
304 
305 	  return EQUALTO;
306 	}
307     }
308 }
309 
310 
311 static void
312 sort_parents (Sym * child)
313 {
314   Arc *arc, *detached, sorted, *prev;
315 
316   /* Unlink parents from child, then insertion sort back on to
317      sorted's parents.
318 	  *arc        the arc you have detached and are inserting.
319 	  *detached   the rest of the arcs to be sorted.
320 	  sorted      arc list onto which you insertion sort.
321 	  *prev       arc before the arc you are comparing.  */
322   sorted.next_parent = 0;
323 
324   for (arc = child->cg.parents; arc; arc = detached)
325     {
326       detached = arc->next_parent;
327 
328       /* Consider *arc as disconnected; insert it into sorted.  */
329       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
330 	{
331 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
332 	    break;
333 	}
334 
335       arc->next_parent = prev->next_parent;
336       prev->next_parent = arc;
337     }
338 
339   /* Reattach sorted arcs to child.  */
340   child->cg.parents = sorted.next_parent;
341 }
342 
343 
344 static void
345 print_parents (Sym *child)
346 {
347   Sym *parent;
348   Arc *arc;
349   Sym *cycle_head;
350 
351   if (child->cg.cyc.head != 0)
352     cycle_head = child->cg.cyc.head;
353   else
354     cycle_head = child;
355 
356   if (!child->cg.parents)
357     {
358       printf (bsd_style_output
359 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
360 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
361 	      "", "", "", "", "", "");
362       return;
363     }
364 
365   sort_parents (child);
366 
367   for (arc = child->cg.parents; arc; arc = arc->next_parent)
368     {
369       parent = arc->parent;
370       if (child == parent || (child->cg.cyc.num != 0
371 			      && parent->cg.cyc.num == child->cg.cyc.num))
372 	{
373 	  /* Selfcall or call among siblings.  */
374 	  printf (bsd_style_output
375 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
376 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
377 		  "", "", "", "",
378 		  arc->count, "");
379 	  print_name (parent);
380 	  printf ("\n");
381 	}
382       else
383 	{
384 	  /* Regular parent of child.  */
385 	  printf (bsd_style_output
386 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
387 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
388 		  "", "",
389 		  arc->time / hz, arc->child_time / hz,
390 		  arc->count, cycle_head->ncalls);
391 	  print_name (parent);
392 	  printf ("\n");
393 	}
394     }
395 }
396 
397 
398 static void
399 sort_children (Sym *parent)
400 {
401   Arc *arc, *detached, sorted, *prev;
402 
403   /* Unlink children from parent, then insertion sort back on to
404      sorted's children.
405 	  *arc        the arc you have detached and are inserting.
406 	  *detached   the rest of the arcs to be sorted.
407 	  sorted      arc list onto which you insertion sort.
408 	  *prev       arc before the arc you are comparing.  */
409   sorted.next_child = 0;
410 
411   for (arc = parent->cg.children; arc; arc = detached)
412     {
413       detached = arc->next_child;
414 
415       /* Consider *arc as disconnected; insert it into sorted.  */
416       for (prev = &sorted; prev->next_child; prev = prev->next_child)
417 	{
418 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
419 	    break;
420 	}
421 
422       arc->next_child = prev->next_child;
423       prev->next_child = arc;
424     }
425 
426   /* Reattach sorted children to parent.  */
427   parent->cg.children = sorted.next_child;
428 }
429 
430 
431 static void
432 print_children (Sym *parent)
433 {
434   Sym *child;
435   Arc *arc;
436 
437   sort_children (parent);
438   arc = parent->cg.children;
439 
440   for (arc = parent->cg.children; arc; arc = arc->next_child)
441     {
442       child = arc->child;
443       if (child == parent || (child->cg.cyc.num != 0
444 			      && child->cg.cyc.num == parent->cg.cyc.num))
445 	{
446 	  /* Self call or call to sibling.  */
447 	  printf (bsd_style_output
448 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
449 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
450 		  "", "", "", "", arc->count, "");
451 	  print_name (child);
452 	  printf ("\n");
453 	}
454       else
455 	{
456 	  /* Regular child of parent.  */
457 	  printf (bsd_style_output
458 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
459 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
460 		  "", "",
461 		  arc->time / hz, arc->child_time / hz,
462 		  arc->count, child->cg.cyc.head->ncalls);
463 	  print_name (child);
464 	  printf ("\n");
465 	}
466     }
467 }
468 
469 
470 static void
471 print_line (Sym *np)
472 {
473   char buf[BUFSIZ];
474 
475   sprintf (buf, "[%d]", np->cg.index);
476   printf (bsd_style_output
477 	  ? "%-6.6s %5.1f %7.2f %11.2f"
478 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
479 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
481 
482   if ((np->ncalls + np->cg.self_calls) != 0)
483     {
484       printf (" %7lu", np->ncalls);
485 
486       if (np->cg.self_calls != 0)
487 	  printf ("+%-7lu ", np->cg.self_calls);
488       else
489 	  printf (" %7.7s ", "");
490     }
491   else
492     {
493       printf (" %7.7s %7.7s ", "", "");
494     }
495 
496   print_name (np);
497   printf ("\n");
498 }
499 
500 
501 /* Print dynamic call graph.  */
502 
503 void
504 cg_print (Sym ** timesortsym)
505 {
506   unsigned int sym_index;
507   Sym *parent;
508 
509   if (print_descriptions && bsd_style_output)
510     bsd_callg_blurb (stdout);
511 
512   print_header ();
513 
514   for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
515     {
516       parent = timesortsym[sym_index];
517 
518       if ((ignore_zeros && parent->ncalls == 0
519 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520 	   && parent->cg.prop.child == 0)
521 	  || !parent->cg.print_flag
522 	  || (line_granularity && ! parent->is_func))
523 	continue;
524 
525       if (!parent->name && parent->cg.cyc.num != 0)
526 	{
527 	  /* Cycle header.  */
528 	  print_cycle (parent);
529 	  print_members (parent);
530 	}
531       else
532 	{
533 	  print_parents (parent);
534 	  print_line (parent);
535 	  print_children (parent);
536 	}
537 
538       if (bsd_style_output)
539 	printf ("\n");
540 
541       printf ("-----------------------------------------------\n");
542 
543       if (bsd_style_output)
544 	printf ("\n");
545     }
546 
547   free (timesortsym);
548 
549   if (print_descriptions && !bsd_style_output)
550     fsf_callg_blurb (stdout);
551 }
552 
553 
554 static int
555 cmp_name (const PTR left, const PTR right)
556 {
557   const Sym **npp1 = (const Sym **) left;
558   const Sym **npp2 = (const Sym **) right;
559 
560   return strcmp ((*npp1)->name, (*npp2)->name);
561 }
562 
563 
564 void
565 cg_print_index ()
566 {
567   unsigned int sym_index;
568   unsigned int nnames, todo, i, j;
569   int col, starting_col;
570   Sym **name_sorted_syms, *sym;
571   const char *filename;
572   char buf[20];
573   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
574 
575   /* Now, sort regular function name
576      alphabetically to create an index.  */
577   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
578 
579   for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
580     {
581       if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582 	  && symtab.base[sym_index].hist.time == 0)
583 	continue;
584 
585       name_sorted_syms[nnames++] = &symtab.base[sym_index];
586     }
587 
588   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
589 
590   for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591     name_sorted_syms[todo++] = &cycle_header[sym_index];
592 
593   printf ("\f\n");
594   printf (_("Index by function name\n\n"));
595   sym_index = (todo + 2) / 3;
596 
597   for (i = 0; i < sym_index; i++)
598     {
599       col = 0;
600       starting_col = 0;
601 
602       for (j = i; j < todo; j += sym_index)
603 	{
604 	  sym = name_sorted_syms[j];
605 
606 	  if (sym->cg.print_flag)
607 	    sprintf (buf, "[%d]", sym->cg.index);
608 	  else
609 	    sprintf (buf, "(%d)", sym->cg.index);
610 
611 	  if (j < nnames)
612 	    {
613 	      if (bsd_style_output)
614 		{
615 		  printf ("%6.6s %-19.19s", buf, sym->name);
616 		}
617 	      else
618 		{
619 		  col += strlen (buf);
620 
621 		  for (; col < starting_col + 5; ++col)
622 		    putchar (' ');
623 
624 		  printf (" %s ", buf);
625 		  col += print_name_only (sym);
626 
627 		  if (!line_granularity && sym->is_static && sym->file)
628 		    {
629 		      filename = sym->file->name;
630 
631 		      if (!print_path)
632 			{
633 			  filename = strrchr (filename, '/');
634 
635 			  if (filename)
636 			    ++filename;
637 			  else
638 			    filename = sym->file->name;
639 			}
640 
641 		      printf (" (%s)", filename);
642 		      col += strlen (filename) + 3;
643 		    }
644 		}
645 	    }
646 	  else
647 	    {
648 	      if (bsd_style_output)
649 		{
650 		  printf ("%6.6s ", buf);
651 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652 		  printf ("%-19.19s", buf);
653 		}
654 	      else
655 		{
656 		  col += strlen (buf);
657 		  for (; col < starting_col + 5; ++col)
658 		    putchar (' ');
659 		  printf (" %s ", buf);
660 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
661 		  printf ("%s", buf);
662 		  col += strlen (buf);
663 		}
664 	    }
665 
666 	  starting_col += column_width;
667 	}
668 
669       printf ("\n");
670     }
671 
672   free (name_sorted_syms);
673 }
674 
675 /* Compare two arcs based on their usage counts.
676    We want to sort in descending order.  */
677 
678 static int
679 cmp_arc_count (const PTR left, const PTR right)
680 {
681   const Arc **npp1 = (const Arc **) left;
682   const Arc **npp2 = (const Arc **) right;
683 
684   if ((*npp1)->count > (*npp2)->count)
685     return -1;
686   else if ((*npp1)->count < (*npp2)->count)
687     return 1;
688   else
689     return 0;
690 }
691 
692 /* Compare two funtions based on their usage counts.
693    We want to sort in descending order.  */
694 
695 static int
696 cmp_fun_nuses (const PTR left, const PTR right)
697 {
698   const Sym **npp1 = (const Sym **) left;
699   const Sym **npp2 = (const Sym **) right;
700 
701   if ((*npp1)->nuses > (*npp2)->nuses)
702     return -1;
703   else if ((*npp1)->nuses < (*npp2)->nuses)
704     return 1;
705   else
706     return 0;
707 }
708 
709 /* Print a suggested function ordering based on the profiling data.
710 
711    We perform 4 major steps when ordering functions:
712 
713 	* Group unused functions together and place them at the
714 	end of the function order.
715 
716 	* Search the highest use arcs (those which account for 90% of
717 	the total arc count) for functions which have several parents.
718 
719 	Group those with the most call sites together (currently the
720 	top 1.25% which have at least five different call sites).
721 
722 	These are emitted at the start of the function order.
723 
724 	* Use a greedy placement algorithm to place functions which
725 	occur in the top 99% of the arcs in the profile.  Some provisions
726 	are made to handle high usage arcs where the parent and/or
727 	child has already been placed.
728 
729 	* Run the same greedy placement algorithm on the remaining
730 	arcs to place the leftover functions.
731 
732 
733    The various "magic numbers" should (one day) be tuneable by command
734    line options.  They were arrived at by benchmarking a few applications
735    with various values to see which values produced better overall function
736    orderings.
737 
738    Of course, profiling errors, machine limitations (PA long calls), and
739    poor cutoff values for the placement algorithm may limit the usefullness
740    of the resulting function order.  Improvements would be greatly appreciated.
741 
742    Suggestions:
743 
744 	* Place the functions with many callers near the middle of the
745 	list to reduce long calls.
746 
747 	* Propagate arc usage changes as functions are placed.  Ie if
748 	func1 and func2 are placed together, arcs to/from those arcs
749 	to the same parent/child should be combined, then resort the
750 	arcs to choose the next one.
751 
752 	* Implement some global positioning algorithm to place the
753 	chains made by the greedy local positioning algorithm.  Probably
754 	by examining arcs which haven't been placed yet to tie two
755 	chains together.
756 
757 	* Take a function's size and time into account in the algorithm;
758 	size in particular is important on the PA (long calls).  Placing
759 	many small functions onto their own page may be wise.
760 
761 	* Use better profiling information; many published algorithms
762 	are based on call sequences through time, rather than just
763 	arc counts.
764 
765 	* Prodecure cloning could improve performance when a small number
766 	of arcs account for most of the calls to a particular function.
767 
768 	* Use relocation information to avoid moving unused functions
769 	completely out of the code stream; this would avoid severe lossage
770 	when the profile data bears little resemblance to actual runs.
771 
772 	* Propagation of arc usages should also improve .o link line
773 	ordering which shares the same arc placement algorithm with
774 	the function ordering code (in fact it is a degenerate case
775 	of function ordering).  */
776 
777 void
778 cg_print_function_ordering (void)
779 {
780   unsigned long sym_index;
781   unsigned long arc_index;
782   unsigned long used, unused, scratch_index;
783   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
784 #ifdef __GNUC__
785   unsigned long long total_arcs, tmp_arcs_count;
786 #else
787   unsigned long total_arcs, tmp_arcs_count;
788 #endif
789   Sym **unused_syms, **used_syms, **scratch_syms;
790   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
791 
792   sym_index = 0;
793   used = 0;
794   unused = 0;
795   scratch_index = 0;
796   unplaced_arc_count = 0;
797   high_arc_count = 0;
798   scratch_arc_count = 0;
799 
800   /* First group all the unused functions together.  */
801   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
807 
808   /* Walk through all the functions; mark those which are never
809      called as placed (we'll emit them as a group later).  */
810   for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
811     {
812       if (symtab.base[sym_index].ncalls == 0)
813 	{
814 	  unused_syms[unused++] = &symtab.base[sym_index];
815 	  symtab.base[sym_index].has_been_placed = 1;
816 	}
817       else
818 	{
819 	  used_syms[used++] = &symtab.base[sym_index];
820 	  symtab.base[sym_index].has_been_placed = 0;
821 	  symtab.base[sym_index].next = 0;
822 	  symtab.base[sym_index].prev = 0;
823 	  symtab.base[sym_index].nuses = 0;
824 	}
825     }
826 
827   /* Sort the arcs from most used to least used.  */
828   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
829 
830   /* Compute the total arc count.  Also mark arcs as unplaced.
831 
832      Note we don't compensate for overflow if that happens!
833      Overflow is much less likely when this file is compiled
834      with GCC as it can double-wide integers via long long.  */
835   total_arcs = 0;
836   for (arc_index = 0; arc_index < numarcs; arc_index++)
837     {
838       total_arcs += arcs[arc_index]->count;
839       arcs[arc_index]->has_been_placed = 0;
840     }
841 
842   /* We want to pull out those functions which are referenced
843      by many highly used arcs and emit them as a group.  This
844      could probably use some tuning.  */
845   tmp_arcs_count = 0;
846   for (arc_index = 0; arc_index < numarcs; arc_index++)
847     {
848       tmp_arcs_count += arcs[arc_index]->count;
849 
850       /* Count how many times each parent and child are used up
851 	 to our threshhold of arcs (90%).  */
852       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
853 	break;
854 
855       arcs[arc_index]->child->nuses++;
856     }
857 
858   /* Now sort a temporary symbol table based on the number of
859      times each function was used in the highest used arcs.  */
860   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
862 
863   /* Now pick out those symbols we're going to emit as
864      a group.  We take up to 1.25% of the used symbols.  */
865   for (sym_index = 0; sym_index < used / 80; sym_index++)
866     {
867       Sym *sym = scratch_syms[sym_index];
868       Arc *arc;
869 
870       /* If we hit symbols that aren't used from many call sites,
871 	 then we can quit.  We choose five as the low limit for
872 	 no particular reason.  */
873       if (sym->nuses == 5)
874 	break;
875 
876       /* We're going to need the arcs between these functions.
877 	 Unfortunately, we don't know all these functions
878 	 until we're done.  So we keep track of all the arcs
879 	 to the functions we care about, then prune out those
880 	 which are uninteresting.
881 
882 	 An interesting variation would be to quit when we found
883 	 multi-call site functions which account for some percentage
884 	 of the arcs.  */
885       arc = sym->cg.children;
886 
887       while (arc)
888 	{
889 	  if (arc->parent != arc->child)
890 	    scratch_arcs[scratch_arc_count++] = arc;
891 	  arc->has_been_placed = 1;
892 	  arc = arc->next_child;
893 	}
894 
895       arc = sym->cg.parents;
896 
897       while (arc)
898 	{
899 	  if (arc->parent != arc->child)
900 	    scratch_arcs[scratch_arc_count++] = arc;
901 	  arc->has_been_placed = 1;
902 	  arc = arc->next_parent;
903 	}
904 
905       /* Keep track of how many symbols we're going to place.  */
906       scratch_index = sym_index;
907 
908       /* A lie, but it makes identifying
909 	 these functions easier later.  */
910       sym->has_been_placed = 1;
911     }
912 
913   /* Now walk through the temporary arcs and copy
914      those we care about into the high arcs array.  */
915   for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
916     {
917       Arc *arc = scratch_arcs[arc_index];
918 
919       /* If this arc refers to highly used functions, then
920 	 then we want to keep it.  */
921       if (arc->child->has_been_placed
922 	  && arc->parent->has_been_placed)
923 	{
924 	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
925 
926 	  /* We need to turn of has_been_placed since we're going to
927 	     use the main arc placement algorithm on these arcs.  */
928 	  arc->child->has_been_placed = 0;
929 	  arc->parent->has_been_placed = 0;
930 	}
931     }
932 
933   /* Dump the multi-site high usage functions which are not
934      going to be ordered by the main ordering algorithm.  */
935   for (sym_index = 0; sym_index < scratch_index; sym_index++)
936     {
937       if (scratch_syms[sym_index]->has_been_placed)
938 	printf ("%s\n", scratch_syms[sym_index]->name);
939     }
940 
941   /* Now we can order the multi-site high use
942      functions based on the arcs between them.  */
943   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945 				    unplaced_arcs, &unplaced_arc_count);
946 
947   /* Order and dump the high use functions left,
948      these typically have only a few call sites.  */
949   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950 				    unplaced_arcs, &unplaced_arc_count);
951 
952   /* Now place the rarely used functions.  */
953   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954 				    scratch_arcs, &scratch_arc_count);
955 
956   /* Output any functions not emitted by the order_and_dump calls.  */
957   for (sym_index = 0; sym_index < used; sym_index++)
958     if (used_syms[sym_index]->has_been_placed == 0)
959       printf("%s\n", used_syms[sym_index]->name);
960 
961   /* Output the unused functions.  */
962   for (sym_index = 0; sym_index < unused; sym_index++)
963     printf("%s\n", unused_syms[sym_index]->name);
964 
965   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
971 
972   free (unused_syms);
973   free (used_syms);
974   free (scratch_syms);
975   free (high_arcs);
976   free (scratch_arcs);
977   free (unplaced_arcs);
978 }
979 
980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
982 
983    If ALL is nonzero, then place all functions referenced by THE_ARCS,
984    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
985 
986 #define MOST 0.99
987 static void
988 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
989 				  unplaced_arcs, unplaced_arc_count)
990      Arc **the_arcs;
991      unsigned long arc_count;
992      int all;
993      Arc **unplaced_arcs;
994      unsigned long *unplaced_arc_count;
995 {
996 #ifdef __GNUC__
997   unsigned long long tmp_arcs, total_arcs;
998 #else
999   unsigned long tmp_arcs, total_arcs;
1000 #endif
1001   unsigned int arc_index;
1002 
1003   /* If needed, compute the total arc count.
1004 
1005      Note we don't compensate for overflow if that happens!  */
1006   if (! all)
1007     {
1008       total_arcs = 0;
1009       for (arc_index = 0; arc_index < arc_count; arc_index++)
1010 	total_arcs += the_arcs[arc_index]->count;
1011     }
1012   else
1013     total_arcs = 0;
1014 
1015   tmp_arcs = 0;
1016 
1017   for (arc_index = 0; arc_index < arc_count; arc_index++)
1018     {
1019       Sym *sym1, *sym2;
1020       Sym *child, *parent;
1021 
1022       tmp_arcs += the_arcs[arc_index]->count;
1023 
1024       /* Ignore this arc if it's already been placed.  */
1025       if (the_arcs[arc_index]->has_been_placed)
1026 	continue;
1027 
1028       child = the_arcs[arc_index]->child;
1029       parent = the_arcs[arc_index]->parent;
1030 
1031       /* If we're not using all arcs, and this is a rarely used
1032 	 arc, then put it on the unplaced_arc list.  Similarly
1033 	 if both the parent and child of this arc have been placed.  */
1034       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1035 	  || child->has_been_placed || parent->has_been_placed)
1036 	{
1037 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1038 	  continue;
1039 	}
1040 
1041       /* If all slots in the parent and child are full, then there isn't
1042 	 anything we can do right now.  We'll place this arc on the
1043 	 unplaced arc list in the hope that a global positioning
1044 	 algorithm can use it to place function chains.  */
1045       if (parent->next && parent->prev && child->next && child->prev)
1046 	{
1047 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1048 	  continue;
1049 	}
1050 
1051       /* If the parent is unattached, then find the closest
1052 	 place to attach it onto child's chain.   Similarly
1053 	 for the opposite case.  */
1054       if (!parent->next && !parent->prev)
1055 	{
1056 	  int next_count = 0;
1057 	  int prev_count = 0;
1058 	  Sym *prev = child;
1059 	  Sym *next = child;
1060 
1061 	  /* Walk to the beginning and end of the child's chain.  */
1062 	  while (next->next)
1063 	    {
1064 	      next = next->next;
1065 	      next_count++;
1066 	    }
1067 
1068 	  while (prev->prev)
1069 	    {
1070 	      prev = prev->prev;
1071 	      prev_count++;
1072 	    }
1073 
1074 	  /* Choose the closest.  */
1075 	  child = next_count < prev_count ? next : prev;
1076 	}
1077       else if (! child->next && !child->prev)
1078 	{
1079 	  int next_count = 0;
1080 	  int prev_count = 0;
1081 	  Sym *prev = parent;
1082 	  Sym *next = parent;
1083 
1084 	  while (next->next)
1085 	    {
1086 	      next = next->next;
1087 	      next_count++;
1088 	    }
1089 
1090 	  while (prev->prev)
1091 	    {
1092 	      prev = prev->prev;
1093 	      prev_count++;
1094 	    }
1095 
1096 	  parent = prev_count < next_count ? prev : next;
1097 	}
1098       else
1099 	{
1100 	  /* Couldn't find anywhere to attach the functions,
1101 	     put the arc on the unplaced arc list.  */
1102 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1103 	  continue;
1104 	}
1105 
1106       /* Make sure we don't tie two ends together.  */
1107       sym1 = parent;
1108       if (sym1->next)
1109 	while (sym1->next)
1110 	  sym1 = sym1->next;
1111       else
1112 	while (sym1->prev)
1113 	  sym1 = sym1->prev;
1114 
1115       sym2 = child;
1116       if (sym2->next)
1117 	while (sym2->next)
1118 	  sym2 = sym2->next;
1119       else
1120 	while (sym2->prev)
1121 	  sym2 = sym2->prev;
1122 
1123       if (sym1 == child
1124 	  && sym2 == parent)
1125 	{
1126 	  /* This would tie two ends together.  */
1127 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1128 	  continue;
1129 	}
1130 
1131       if (parent->next)
1132 	{
1133 	  /* Must attach to the parent's prev field.  */
1134 	  if (! child->next)
1135 	    {
1136 	      /* parent-prev and child-next */
1137 	      parent->prev = child;
1138 	      child->next = parent;
1139 	      the_arcs[arc_index]->has_been_placed = 1;
1140 	    }
1141 	}
1142       else if (parent->prev)
1143 	{
1144 	  /* Must attach to the parent's next field.  */
1145 	  if (! child->prev)
1146 	    {
1147 	      /* parent-next and child-prev */
1148 	      parent->next = child;
1149 	      child->prev = parent;
1150 	      the_arcs[arc_index]->has_been_placed = 1;
1151 	    }
1152 	}
1153       else
1154 	{
1155 	  /* Can attach to either field in the parent, depends
1156 	     on where we've got space in the child.  */
1157 	  if (child->prev)
1158 	    {
1159 	      /* parent-prev and child-next.  */
1160 	      parent->prev = child;
1161 	      child->next = parent;
1162 	      the_arcs[arc_index]->has_been_placed = 1;
1163 	    }
1164 	  else
1165 	    {
1166 	      /* parent-next and child-prev.  */
1167 	      parent->next = child;
1168 	      child->prev = parent;
1169 	      the_arcs[arc_index]->has_been_placed = 1;
1170 	    }
1171 	}
1172     }
1173 
1174   /* Dump the chains of functions we've made.  */
1175   for (arc_index = 0; arc_index < arc_count; arc_index++)
1176     {
1177       Sym *sym;
1178       if (the_arcs[arc_index]->parent->has_been_placed
1179 	  || the_arcs[arc_index]->child->has_been_placed)
1180 	continue;
1181 
1182       sym = the_arcs[arc_index]->parent;
1183 
1184       /* If this symbol isn't attached to any other
1185 	 symbols, then we've got a rarely used arc.
1186 
1187 	 Skip it for now, we'll deal with them later.  */
1188       if (sym->next == NULL
1189 	  && sym->prev == NULL)
1190 	continue;
1191 
1192       /* Get to the start of this chain.  */
1193       while (sym->prev)
1194 	sym = sym->prev;
1195 
1196       while (sym)
1197 	{
1198 	  /* Mark it as placed.  */
1199 	  sym->has_been_placed = 1;
1200 	  printf ("%s\n", sym->name);
1201 	  sym = sym->next;
1202 	}
1203     }
1204 
1205   /* If we want to place all the arcs, then output
1206      those which weren't placed by the main algorithm.  */
1207   if (all)
1208     for (arc_index = 0; arc_index < arc_count; arc_index++)
1209       {
1210 	Sym *sym;
1211 	if (the_arcs[arc_index]->parent->has_been_placed
1212 	    || the_arcs[arc_index]->child->has_been_placed)
1213 	  continue;
1214 
1215 	sym = the_arcs[arc_index]->parent;
1216 
1217 	sym->has_been_placed = 1;
1218 	printf ("%s\n", sym->name);
1219       }
1220 }
1221 
1222 /* Compare two function_map structs based on file name.
1223    We want to sort in ascending order.  */
1224 
1225 static int
1226 cmp_symbol_map (const void * l, const void * r)
1227 {
1228   return filename_cmp (((struct function_map *) l)->file_name,
1229 		       ((struct function_map *) r)->file_name);
1230 }
1231 
1232 /* Print a suggested .o ordering for files on a link line based
1233    on profiling information.  This uses the function placement
1234    code for the bulk of its work.  */
1235 
1236 void
1237 cg_print_file_ordering (void)
1238 {
1239   unsigned long scratch_arc_count;
1240   unsigned long arc_index;
1241   unsigned long sym_index;
1242   Arc **scratch_arcs;
1243   char *last;
1244 
1245   scratch_arc_count = 0;
1246 
1247   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1248   for (arc_index = 0; arc_index < numarcs; arc_index++)
1249     {
1250       if (! arcs[arc_index]->parent->mapped
1251 	  || ! arcs[arc_index]->child->mapped)
1252 	arcs[arc_index]->has_been_placed = 1;
1253     }
1254 
1255   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1256 				    scratch_arcs, &scratch_arc_count);
1257 
1258   /* Output .o's not handled by the main placement algorithm.  */
1259   for (sym_index = 0; sym_index < symtab.len; sym_index++)
1260     {
1261       if (symtab.base[sym_index].mapped
1262 	  && ! symtab.base[sym_index].has_been_placed)
1263 	printf ("%s\n", symtab.base[sym_index].name);
1264     }
1265 
1266   qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1267 
1268   /* Now output any .o's that didn't have any text symbols.  */
1269   last = NULL;
1270   for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1271     {
1272       unsigned int index2;
1273 
1274       /* Don't bother searching if this symbol
1275 	 is the same as the previous one.  */
1276       if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1277 	continue;
1278 
1279       for (index2 = 0; index2 < symtab.len; index2++)
1280 	{
1281 	  if (! symtab.base[index2].mapped)
1282 	    continue;
1283 
1284 	  if (!filename_cmp (symtab.base[index2].name,
1285 			     symbol_map[sym_index].file_name))
1286 	    break;
1287 	}
1288 
1289       /* If we didn't find it in the symbol table, then it must
1290 	 be a .o with no text symbols.  Output it last.  */
1291       if (index2 == symtab.len)
1292 	printf ("%s\n", symbol_map[sym_index].file_name);
1293       last = symbol_map[sym_index].file_name;
1294     }
1295 }
1296