xref: /openbsd-src/gnu/usr.bin/binutils-2.17/gprof/hist.c (revision 3d8817e467ea46cf4772788d6804dd293abfb01a)
1 /* hist.c  -  Histogram related operations.
2 
3    Copyright 1999, 2000, 2001, 2002, 2004, 2005
4    Free Software Foundation, Inc.
5 
6    This file is part of GNU Binutils.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2 of the License, or
11    (at your option) any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21    02110-1301, USA.  */
22 
23 #include "libiberty.h"
24 #include "gprof.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "corefile.h"
29 #include "gmon_io.h"
30 #include "gmon_out.h"
31 #include "hist.h"
32 #include "sym_ids.h"
33 #include "utils.h"
34 
35 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT))
36 
37 static void scale_and_align_entries (void);
38 static void print_header (int);
39 static void print_line (Sym *, double);
40 static int cmp_time (const PTR, const PTR);
41 
42 /* Declarations of automatically generated functions to output blurbs.  */
43 extern void flat_blurb (FILE * fp);
44 
45 bfd_vma s_lowpc;		/* Lowest address in .text.  */
46 bfd_vma s_highpc = 0;		/* Highest address in .text.  */
47 bfd_vma lowpc, highpc;		/* Same, but expressed in UNITs.  */
48 unsigned int hist_num_bins = 0;	/* Number of histogram samples.  */
49 int *hist_sample = 0;		/* Histogram samples (shorts in the file!).  */
50 double hist_scale;
51 static char hist_dimension[16] = "seconds";
52 static char hist_dimension_abbrev = 's';
53 
54 static double accum_time;	/* Accumulated time so far for print_line(). */
55 static double total_time;	/* Total time for all routines.  */
56 
57 /* Table of SI prefixes for powers of 10 (used to automatically
58    scale some of the values in the flat profile).  */
59 const struct
60   {
61     char prefix;
62     double scale;
63   }
64 SItab[] =
65 {
66   { 'T', 1e-12 },				/* tera */
67   { 'G', 1e-09 },				/* giga */
68   { 'M', 1e-06 },				/* mega */
69   { 'K', 1e-03 },				/* kilo */
70   { ' ', 1e-00 },
71   { 'm', 1e+03 },				/* milli */
72   { 'u', 1e+06 },				/* micro */
73   { 'n', 1e+09 },				/* nano */
74   { 'p', 1e+12 },				/* pico */
75   { 'f', 1e+15 },				/* femto */
76   { 'a', 1e+18 }				/* ato */
77 };
78 
79 
80 /* Read the histogram from file IFP.  FILENAME is the name of IFP and
81    is provided for formatting error messages only.  */
82 
83 void
hist_read_rec(FILE * ifp,const char * filename)84 hist_read_rec (FILE * ifp, const char *filename)
85 {
86   bfd_vma n_lowpc, n_highpc;
87   unsigned int i, ncnt, profrate;
88   UNIT count;
89 
90   if (gmon_io_read_vma (ifp, &n_lowpc)
91       || gmon_io_read_vma (ifp, &n_highpc)
92       || gmon_io_read_32 (ifp, &ncnt)
93       || gmon_io_read_32 (ifp, &profrate)
94       || gmon_io_read (ifp, hist_dimension, 15)
95       || gmon_io_read (ifp, &hist_dimension_abbrev, 1))
96     {
97       fprintf (stderr, _("%s: %s: unexpected end of file\n"),
98 	       whoami, filename);
99 
100       done (1);
101     }
102 
103   if (!s_highpc)
104     {
105       /* This is the first histogram record.  */
106       s_lowpc = n_lowpc;
107       s_highpc = n_highpc;
108       lowpc = (bfd_vma) n_lowpc / sizeof (UNIT);
109       highpc = (bfd_vma) n_highpc / sizeof (UNIT);
110       hist_num_bins = ncnt;
111       hz = profrate;
112     }
113 
114   DBG (SAMPLEDEBUG,
115        printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n",
116 	       (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt);
117        printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n",
118 	       (unsigned long) s_lowpc, (unsigned long) s_highpc,
119 	       hist_num_bins);
120        printf ("[hist_read_rec]   lowpc 0x%lx   highpc 0x%lx\n",
121 	       (unsigned long) lowpc, (unsigned long) highpc));
122 
123   if (n_lowpc != s_lowpc || n_highpc != s_highpc
124       || ncnt != hist_num_bins || hz != (int) profrate)
125     {
126       fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"),
127 	       whoami, filename);
128       done (1);
129     }
130 
131   if (!hist_sample)
132     {
133       hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0]));
134       memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0]));
135     }
136 
137   for (i = 0; i < hist_num_bins; ++i)
138     {
139       if (fread (&count[0], sizeof (count), 1, ifp) != 1)
140 	{
141 	  fprintf (stderr,
142 		  _("%s: %s: unexpected EOF after reading %u of %u samples\n"),
143 		   whoami, filename, i, hist_num_bins);
144 	  done (1);
145 	}
146       hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]);
147       DBG (SAMPLEDEBUG,
148 	   printf ("[hist_read_rec] 0x%lx: %u\n",
149 		   (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt),
150 		   hist_sample[i]));
151     }
152 }
153 
154 
155 /* Write execution histogram to file OFP.  FILENAME is the name
156    of OFP and is provided for formatting error-messages only.  */
157 
158 void
hist_write_hist(FILE * ofp,const char * filename)159 hist_write_hist (FILE * ofp, const char *filename)
160 {
161   UNIT count;
162   unsigned int i;
163 
164   /* Write header.  */
165 
166   if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST)
167       || gmon_io_write_vma (ofp, s_lowpc)
168       || gmon_io_write_vma (ofp, s_highpc)
169       || gmon_io_write_32 (ofp, hist_num_bins)
170       || gmon_io_write_32 (ofp, hz)
171       || gmon_io_write (ofp, hist_dimension, 15)
172       || gmon_io_write (ofp, &hist_dimension_abbrev, 1))
173     {
174       perror (filename);
175       done (1);
176     }
177 
178   for (i = 0; i < hist_num_bins; ++i)
179     {
180       bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]);
181 
182       if (fwrite (&count[0], sizeof (count), 1, ofp) != 1)
183 	{
184 	  perror (filename);
185 	  done (1);
186 	}
187     }
188 }
189 
190 
191 /* Calculate scaled entry point addresses (to save time in
192    hist_assign_samples), and, on architectures that have procedure
193    entry masks at the start of a function, possibly push the scaled
194    entry points over the procedure entry mask, if it turns out that
195    the entry point is in one bin and the code for a routine is in the
196    next bin.  */
197 
198 static void
scale_and_align_entries()199 scale_and_align_entries ()
200 {
201   Sym *sym;
202   bfd_vma bin_of_entry;
203   bfd_vma bin_of_code;
204 
205   for (sym = symtab.base; sym < symtab.limit; sym++)
206     {
207       sym->hist.scaled_addr = sym->addr / sizeof (UNIT);
208       bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale;
209       bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc)
210 		     / hist_scale);
211       if (bin_of_entry < bin_of_code)
212 	{
213 	  DBG (SAMPLEDEBUG,
214 	       printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n",
215 		       (unsigned long) sym->hist.scaled_addr,
216 		       (unsigned long) (sym->hist.scaled_addr
217 					+ UNITS_TO_CODE)));
218 	  sym->hist.scaled_addr += UNITS_TO_CODE;
219 	}
220     }
221 }
222 
223 
224 /* Assign samples to the symbol to which they belong.
225 
226    Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC)
227    which may overlap one more symbol address ranges.  If a symbol
228    overlaps with the bin's address range by O percent, then O percent
229    of the bin's count is credited to that symbol.
230 
231    There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be
232    with respect to the symbol's address range [SYM_LOW_PC,
233    SYM_HIGH_PC) as shown in the following diagram.  OVERLAP computes
234    the distance (in UNITs) between the arrows, the fraction of the
235    sample that is to be credited to the symbol which starts at
236    SYM_LOW_PC.
237 
238 	  sym_low_pc                                      sym_high_pc
239 	       |                                               |
240 	       v                                               v
241 
242 	       +-----------------------------------------------+
243 	       |                                               |
244 	  |  ->|    |<-         ->|         |<-         ->|    |<-  |
245 	  |         |             |         |             |         |
246 	  +---------+             +---------+             +---------+
247 
248 	  ^         ^             ^         ^             ^         ^
249 	  |         |             |         |             |         |
250      bin_low_pc bin_high_pc  bin_low_pc bin_high_pc  bin_low_pc bin_high_pc
251 
252    For the VAX we assert that samples will never fall in the first two
253    bytes of any routine, since that is the entry mask, thus we call
254    scale_and_align_entries() to adjust the entry points if the entry
255    mask falls in one bin but the code for the routine doesn't start
256    until the next bin.  In conjunction with the alignment of routine
257    addresses, this should allow us to have only one sample for every
258    four bytes of text space and never have any overlap (the two end
259    cases, above).  */
260 
261 void
hist_assign_samples()262 hist_assign_samples ()
263 {
264   bfd_vma bin_low_pc, bin_high_pc;
265   bfd_vma sym_low_pc, sym_high_pc;
266   bfd_vma overlap, addr;
267   unsigned int bin_count;
268   unsigned int i, j;
269   double time, credit;
270 
271   /* Read samples and assign to symbols.  */
272   hist_scale = highpc - lowpc;
273   hist_scale /= hist_num_bins;
274   scale_and_align_entries ();
275 
276   /* Iterate over all sample bins.  */
277   for (i = 0, j = 1; i < hist_num_bins; ++i)
278     {
279       bin_count = hist_sample[i];
280       if (! bin_count)
281 	continue;
282 
283       bin_low_pc = lowpc + (bfd_vma) (hist_scale * i);
284       bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1));
285       time = bin_count;
286 
287       DBG (SAMPLEDEBUG,
288 	   printf (
289       "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n",
290 		    (unsigned long) (sizeof (UNIT) * bin_low_pc),
291 		    (unsigned long) (sizeof (UNIT) * bin_high_pc),
292 		    bin_count));
293       total_time += time;
294 
295       /* Credit all symbols that are covered by bin I.  */
296       for (j = j - 1; j < symtab.len; ++j)
297 	{
298 	  sym_low_pc = symtab.base[j].hist.scaled_addr;
299 	  sym_high_pc = symtab.base[j + 1].hist.scaled_addr;
300 
301 	  /* If high end of bin is below entry address,
302 	     go for next bin.  */
303 	  if (bin_high_pc < sym_low_pc)
304 	    break;
305 
306 	  /* If low end of bin is above high end of symbol,
307 	     go for next symbol.  */
308 	  if (bin_low_pc >= sym_high_pc)
309 	    continue;
310 
311 	  overlap =
312 	    MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc);
313 	  if (overlap > 0)
314 	    {
315 	      DBG (SAMPLEDEBUG,
316 		   printf (
317 	       "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n",
318 			   (unsigned long) symtab.base[j].addr,
319 			   (unsigned long) (sizeof (UNIT) * sym_high_pc),
320 			   symtab.base[j].name, overlap * time / hist_scale,
321 			   (long) overlap));
322 
323 	      addr = symtab.base[j].addr;
324 	      credit = overlap * time / hist_scale;
325 
326 	      /* Credit symbol if it appears in INCL_FLAT or that
327 		 table is empty and it does not appear it in
328 		 EXCL_FLAT.  */
329 	      if (sym_lookup (&syms[INCL_FLAT], addr)
330 		  || (syms[INCL_FLAT].len == 0
331 		      && !sym_lookup (&syms[EXCL_FLAT], addr)))
332 		{
333 		  symtab.base[j].hist.time += credit;
334 		}
335 	      else
336 		{
337 		  total_time -= credit;
338 		}
339 	    }
340 	}
341     }
342 
343   DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n",
344 			    total_time));
345 }
346 
347 
348 /* Print header for flag histogram profile.  */
349 
350 static void
print_header(int prefix)351 print_header (int prefix)
352 {
353   char unit[64];
354 
355   sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev);
356 
357   if (bsd_style_output)
358     {
359       printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
360 	      (long) hist_scale * sizeof (UNIT));
361       if (total_time > 0.0)
362 	{
363 	  printf (_(" for %.2f%% of %.2f %s\n\n"),
364 		  100.0 / total_time, total_time / hz, hist_dimension);
365 	}
366     }
367   else
368     {
369       printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension);
370     }
371 
372   if (total_time <= 0.0)
373     {
374       printf (_(" no time accumulated\n\n"));
375 
376       /* This doesn't hurt since all the numerators will be zero.  */
377       total_time = 1.0;
378     }
379 
380   printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
381 	  "%  ", _("cumulative"), _("self  "), "", _("self  "), _("total "),
382 	  "");
383   printf ("%5.5s %9.9s  %8.8s %8.8s %8.8s %8.8s  %-8.8s\n",
384 	  _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit,
385 	  _("name"));
386 }
387 
388 
389 static void
print_line(Sym * sym,double scale)390 print_line (Sym *sym, double scale)
391 {
392   if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0)
393     return;
394 
395   accum_time += sym->hist.time;
396 
397   if (bsd_style_output)
398     printf ("%5.1f %10.2f %8.2f",
399 	    total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
400 	    accum_time / hz, sym->hist.time / hz);
401   else
402     printf ("%6.2f %9.2f %8.2f",
403 	    total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0,
404 	    accum_time / hz, sym->hist.time / hz);
405 
406   if (sym->ncalls != 0)
407     printf (" %8lu %8.2f %8.2f  ",
408 	    sym->ncalls, scale * sym->hist.time / hz / sym->ncalls,
409 	    scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls);
410   else
411     printf (" %8.8s %8.8s %8.8s  ", "", "", "");
412 
413   if (bsd_style_output)
414     print_name (sym);
415   else
416     print_name_only (sym);
417 
418   printf ("\n");
419 }
420 
421 
422 /* Compare LP and RP.  The primary comparison key is execution time,
423    the secondary is number of invocation, and the tertiary is the
424    lexicographic order of the function names.  */
425 
426 static int
cmp_time(const PTR lp,const PTR rp)427 cmp_time (const PTR lp, const PTR rp)
428 {
429   const Sym *left = *(const Sym **) lp;
430   const Sym *right = *(const Sym **) rp;
431   double time_diff;
432 
433   time_diff = right->hist.time - left->hist.time;
434 
435   if (time_diff > 0.0)
436     return 1;
437 
438   if (time_diff < 0.0)
439     return -1;
440 
441   if (right->ncalls > left->ncalls)
442     return 1;
443 
444   if (right->ncalls < left->ncalls)
445     return -1;
446 
447   return strcmp (left->name, right->name);
448 }
449 
450 
451 /* Print the flat histogram profile.  */
452 
453 void
hist_print()454 hist_print ()
455 {
456   Sym **time_sorted_syms, *top_dog, *sym;
457   unsigned int index;
458   unsigned log_scale;
459   double top_time, time;
460   bfd_vma addr;
461 
462   if (first_output)
463     first_output = FALSE;
464   else
465     printf ("\f\n");
466 
467   accum_time = 0.0;
468 
469   if (bsd_style_output)
470     {
471       if (print_descriptions)
472 	{
473 	  printf (_("\n\n\nflat profile:\n"));
474 	  flat_blurb (stdout);
475 	}
476     }
477   else
478     {
479       printf (_("Flat profile:\n"));
480     }
481 
482   /* Sort the symbol table by time (call-count and name as secondary
483      and tertiary keys).  */
484   time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
485 
486   for (index = 0; index < symtab.len; ++index)
487     time_sorted_syms[index] = &symtab.base[index];
488 
489   qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time);
490 
491   if (bsd_style_output)
492     {
493       log_scale = 5;		/* Milli-seconds is BSD-default.  */
494     }
495   else
496     {
497       /* Search for symbol with highest per-call
498 	 execution time and scale accordingly.  */
499       log_scale = 0;
500       top_dog = 0;
501       top_time = 0.0;
502 
503       for (index = 0; index < symtab.len; ++index)
504 	{
505 	  sym = time_sorted_syms[index];
506 
507 	  if (sym->ncalls != 0)
508 	    {
509 	      time = (sym->hist.time + sym->cg.child_time) / sym->ncalls;
510 
511 	      if (time > top_time)
512 		{
513 		  top_dog = sym;
514 		  top_time = time;
515 		}
516 	    }
517 	}
518 
519       if (top_dog && top_dog->ncalls != 0 && top_time > 0.0)
520 	{
521 	  top_time /= hz;
522 
523 	  for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++)
524 	    {
525 	      double scaled_value = SItab[log_scale].scale * top_time;
526 
527 	      if (scaled_value >= 1.0 && scaled_value < 1000.0)
528 		break;
529 	    }
530 	}
531     }
532 
533   /* For now, the dimension is always seconds.  In the future, we
534      may also want to support other (pseudo-)dimensions (such as
535      I-cache misses etc.).  */
536   print_header (SItab[log_scale].prefix);
537 
538   for (index = 0; index < symtab.len; ++index)
539     {
540       addr = time_sorted_syms[index]->addr;
541 
542       /* Print symbol if its in INCL_FLAT table or that table
543 	is empty and the symbol is not in EXCL_FLAT.  */
544       if (sym_lookup (&syms[INCL_FLAT], addr)
545 	  || (syms[INCL_FLAT].len == 0
546 	      && !sym_lookup (&syms[EXCL_FLAT], addr)))
547 	print_line (time_sorted_syms[index], SItab[log_scale].scale);
548     }
549 
550   free (time_sorted_syms);
551 
552   if (print_descriptions && !bsd_style_output)
553     flat_blurb (stdout);
554 }
555