1 /* hist.c - Histogram related operations. 2 3 Copyright 1999, 2000, 2001, 2002, 2004, 2005 4 Free Software Foundation, Inc. 5 6 This file is part of GNU Binutils. 7 8 This program is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 2 of the License, or 11 (at your option) any later version. 12 13 This program is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with this program; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 21 02110-1301, USA. */ 22 23 #include "libiberty.h" 24 #include "gprof.h" 25 #include "search_list.h" 26 #include "source.h" 27 #include "symtab.h" 28 #include "corefile.h" 29 #include "gmon_io.h" 30 #include "gmon_out.h" 31 #include "hist.h" 32 #include "sym_ids.h" 33 #include "utils.h" 34 35 #define UNITS_TO_CODE (offset_to_code / sizeof(UNIT)) 36 37 static void scale_and_align_entries (void); 38 static void print_header (int); 39 static void print_line (Sym *, double); 40 static int cmp_time (const PTR, const PTR); 41 42 /* Declarations of automatically generated functions to output blurbs. */ 43 extern void flat_blurb (FILE * fp); 44 45 bfd_vma s_lowpc; /* Lowest address in .text. */ 46 bfd_vma s_highpc = 0; /* Highest address in .text. */ 47 bfd_vma lowpc, highpc; /* Same, but expressed in UNITs. */ 48 unsigned int hist_num_bins = 0; /* Number of histogram samples. */ 49 int *hist_sample = 0; /* Histogram samples (shorts in the file!). */ 50 double hist_scale; 51 static char hist_dimension[16] = "seconds"; 52 static char hist_dimension_abbrev = 's'; 53 54 static double accum_time; /* Accumulated time so far for print_line(). */ 55 static double total_time; /* Total time for all routines. */ 56 57 /* Table of SI prefixes for powers of 10 (used to automatically 58 scale some of the values in the flat profile). */ 59 const struct 60 { 61 char prefix; 62 double scale; 63 } 64 SItab[] = 65 { 66 { 'T', 1e-12 }, /* tera */ 67 { 'G', 1e-09 }, /* giga */ 68 { 'M', 1e-06 }, /* mega */ 69 { 'K', 1e-03 }, /* kilo */ 70 { ' ', 1e-00 }, 71 { 'm', 1e+03 }, /* milli */ 72 { 'u', 1e+06 }, /* micro */ 73 { 'n', 1e+09 }, /* nano */ 74 { 'p', 1e+12 }, /* pico */ 75 { 'f', 1e+15 }, /* femto */ 76 { 'a', 1e+18 } /* ato */ 77 }; 78 79 80 /* Read the histogram from file IFP. FILENAME is the name of IFP and 81 is provided for formatting error messages only. */ 82 83 void 84 hist_read_rec (FILE * ifp, const char *filename) 85 { 86 bfd_vma n_lowpc, n_highpc; 87 unsigned int i, ncnt, profrate; 88 UNIT count; 89 90 if (gmon_io_read_vma (ifp, &n_lowpc) 91 || gmon_io_read_vma (ifp, &n_highpc) 92 || gmon_io_read_32 (ifp, &ncnt) 93 || gmon_io_read_32 (ifp, &profrate) 94 || gmon_io_read (ifp, hist_dimension, 15) 95 || gmon_io_read (ifp, &hist_dimension_abbrev, 1)) 96 { 97 fprintf (stderr, _("%s: %s: unexpected end of file\n"), 98 whoami, filename); 99 100 done (1); 101 } 102 103 if (!s_highpc) 104 { 105 /* This is the first histogram record. */ 106 s_lowpc = n_lowpc; 107 s_highpc = n_highpc; 108 lowpc = (bfd_vma) n_lowpc / sizeof (UNIT); 109 highpc = (bfd_vma) n_highpc / sizeof (UNIT); 110 hist_num_bins = ncnt; 111 hz = profrate; 112 } 113 114 DBG (SAMPLEDEBUG, 115 printf ("[hist_read_rec] n_lowpc 0x%lx n_highpc 0x%lx ncnt %u\n", 116 (unsigned long) n_lowpc, (unsigned long) n_highpc, ncnt); 117 printf ("[hist_read_rec] s_lowpc 0x%lx s_highpc 0x%lx nsamples %u\n", 118 (unsigned long) s_lowpc, (unsigned long) s_highpc, 119 hist_num_bins); 120 printf ("[hist_read_rec] lowpc 0x%lx highpc 0x%lx\n", 121 (unsigned long) lowpc, (unsigned long) highpc)); 122 123 if (n_lowpc != s_lowpc || n_highpc != s_highpc 124 || ncnt != hist_num_bins || hz != (int) profrate) 125 { 126 fprintf (stderr, _("%s: `%s' is incompatible with first gmon file\n"), 127 whoami, filename); 128 done (1); 129 } 130 131 if (!hist_sample) 132 { 133 hist_sample = (int *) xmalloc (hist_num_bins * sizeof (hist_sample[0])); 134 memset (hist_sample, 0, hist_num_bins * sizeof (hist_sample[0])); 135 } 136 137 for (i = 0; i < hist_num_bins; ++i) 138 { 139 if (fread (&count[0], sizeof (count), 1, ifp) != 1) 140 { 141 fprintf (stderr, 142 _("%s: %s: unexpected EOF after reading %u of %u samples\n"), 143 whoami, filename, i, hist_num_bins); 144 done (1); 145 } 146 hist_sample[i] += bfd_get_16 (core_bfd, (bfd_byte *) & count[0]); 147 DBG (SAMPLEDEBUG, 148 printf ("[hist_read_rec] 0x%lx: %u\n", 149 (unsigned long) (n_lowpc + i * (n_highpc - n_lowpc) / ncnt), 150 hist_sample[i])); 151 } 152 } 153 154 155 /* Write execution histogram to file OFP. FILENAME is the name 156 of OFP and is provided for formatting error-messages only. */ 157 158 void 159 hist_write_hist (FILE * ofp, const char *filename) 160 { 161 UNIT count; 162 unsigned int i; 163 164 /* Write header. */ 165 166 if (gmon_io_write_8 (ofp, GMON_TAG_TIME_HIST) 167 || gmon_io_write_vma (ofp, s_lowpc) 168 || gmon_io_write_vma (ofp, s_highpc) 169 || gmon_io_write_32 (ofp, hist_num_bins) 170 || gmon_io_write_32 (ofp, hz) 171 || gmon_io_write (ofp, hist_dimension, 15) 172 || gmon_io_write (ofp, &hist_dimension_abbrev, 1)) 173 { 174 perror (filename); 175 done (1); 176 } 177 178 for (i = 0; i < hist_num_bins; ++i) 179 { 180 bfd_put_16 (core_bfd, (bfd_vma) hist_sample[i], (bfd_byte *) &count[0]); 181 182 if (fwrite (&count[0], sizeof (count), 1, ofp) != 1) 183 { 184 perror (filename); 185 done (1); 186 } 187 } 188 } 189 190 191 /* Calculate scaled entry point addresses (to save time in 192 hist_assign_samples), and, on architectures that have procedure 193 entry masks at the start of a function, possibly push the scaled 194 entry points over the procedure entry mask, if it turns out that 195 the entry point is in one bin and the code for a routine is in the 196 next bin. */ 197 198 static void 199 scale_and_align_entries () 200 { 201 Sym *sym; 202 bfd_vma bin_of_entry; 203 bfd_vma bin_of_code; 204 205 for (sym = symtab.base; sym < symtab.limit; sym++) 206 { 207 sym->hist.scaled_addr = sym->addr / sizeof (UNIT); 208 bin_of_entry = (sym->hist.scaled_addr - lowpc) / hist_scale; 209 bin_of_code = ((sym->hist.scaled_addr + UNITS_TO_CODE - lowpc) 210 / hist_scale); 211 if (bin_of_entry < bin_of_code) 212 { 213 DBG (SAMPLEDEBUG, 214 printf ("[scale_and_align_entries] pushing 0x%lx to 0x%lx\n", 215 (unsigned long) sym->hist.scaled_addr, 216 (unsigned long) (sym->hist.scaled_addr 217 + UNITS_TO_CODE))); 218 sym->hist.scaled_addr += UNITS_TO_CODE; 219 } 220 } 221 } 222 223 224 /* Assign samples to the symbol to which they belong. 225 226 Histogram bin I covers some address range [BIN_LOWPC,BIN_HIGH_PC) 227 which may overlap one more symbol address ranges. If a symbol 228 overlaps with the bin's address range by O percent, then O percent 229 of the bin's count is credited to that symbol. 230 231 There are three cases as to where BIN_LOW_PC and BIN_HIGH_PC can be 232 with respect to the symbol's address range [SYM_LOW_PC, 233 SYM_HIGH_PC) as shown in the following diagram. OVERLAP computes 234 the distance (in UNITs) between the arrows, the fraction of the 235 sample that is to be credited to the symbol which starts at 236 SYM_LOW_PC. 237 238 sym_low_pc sym_high_pc 239 | | 240 v v 241 242 +-----------------------------------------------+ 243 | | 244 | ->| |<- ->| |<- ->| |<- | 245 | | | | | | 246 +---------+ +---------+ +---------+ 247 248 ^ ^ ^ ^ ^ ^ 249 | | | | | | 250 bin_low_pc bin_high_pc bin_low_pc bin_high_pc bin_low_pc bin_high_pc 251 252 For the VAX we assert that samples will never fall in the first two 253 bytes of any routine, since that is the entry mask, thus we call 254 scale_and_align_entries() to adjust the entry points if the entry 255 mask falls in one bin but the code for the routine doesn't start 256 until the next bin. In conjunction with the alignment of routine 257 addresses, this should allow us to have only one sample for every 258 four bytes of text space and never have any overlap (the two end 259 cases, above). */ 260 261 void 262 hist_assign_samples () 263 { 264 bfd_vma bin_low_pc, bin_high_pc; 265 bfd_vma sym_low_pc, sym_high_pc; 266 bfd_vma overlap, addr; 267 unsigned int bin_count; 268 unsigned int i, j; 269 double time, credit; 270 271 /* Read samples and assign to symbols. */ 272 hist_scale = highpc - lowpc; 273 hist_scale /= hist_num_bins; 274 scale_and_align_entries (); 275 276 /* Iterate over all sample bins. */ 277 for (i = 0, j = 1; i < hist_num_bins; ++i) 278 { 279 bin_count = hist_sample[i]; 280 if (! bin_count) 281 continue; 282 283 bin_low_pc = lowpc + (bfd_vma) (hist_scale * i); 284 bin_high_pc = lowpc + (bfd_vma) (hist_scale * (i + 1)); 285 time = bin_count; 286 287 DBG (SAMPLEDEBUG, 288 printf ( 289 "[assign_samples] bin_low_pc=0x%lx, bin_high_pc=0x%lx, bin_count=%u\n", 290 (unsigned long) (sizeof (UNIT) * bin_low_pc), 291 (unsigned long) (sizeof (UNIT) * bin_high_pc), 292 bin_count)); 293 total_time += time; 294 295 /* Credit all symbols that are covered by bin I. */ 296 for (j = j - 1; j < symtab.len; ++j) 297 { 298 sym_low_pc = symtab.base[j].hist.scaled_addr; 299 sym_high_pc = symtab.base[j + 1].hist.scaled_addr; 300 301 /* If high end of bin is below entry address, 302 go for next bin. */ 303 if (bin_high_pc < sym_low_pc) 304 break; 305 306 /* If low end of bin is above high end of symbol, 307 go for next symbol. */ 308 if (bin_low_pc >= sym_high_pc) 309 continue; 310 311 overlap = 312 MIN (bin_high_pc, sym_high_pc) - MAX (bin_low_pc, sym_low_pc); 313 if (overlap > 0) 314 { 315 DBG (SAMPLEDEBUG, 316 printf ( 317 "[assign_samples] [0x%lx,0x%lx) %s gets %f ticks %ld overlap\n", 318 (unsigned long) symtab.base[j].addr, 319 (unsigned long) (sizeof (UNIT) * sym_high_pc), 320 symtab.base[j].name, overlap * time / hist_scale, 321 (long) overlap)); 322 323 addr = symtab.base[j].addr; 324 credit = overlap * time / hist_scale; 325 326 /* Credit symbol if it appears in INCL_FLAT or that 327 table is empty and it does not appear it in 328 EXCL_FLAT. */ 329 if (sym_lookup (&syms[INCL_FLAT], addr) 330 || (syms[INCL_FLAT].len == 0 331 && !sym_lookup (&syms[EXCL_FLAT], addr))) 332 { 333 symtab.base[j].hist.time += credit; 334 } 335 else 336 { 337 total_time -= credit; 338 } 339 } 340 } 341 } 342 343 DBG (SAMPLEDEBUG, printf ("[assign_samples] total_time %f\n", 344 total_time)); 345 } 346 347 348 /* Print header for flag histogram profile. */ 349 350 static void 351 print_header (int prefix) 352 { 353 char unit[64]; 354 355 sprintf (unit, _("%c%c/call"), prefix, hist_dimension_abbrev); 356 357 if (bsd_style_output) 358 { 359 printf (_("\ngranularity: each sample hit covers %ld byte(s)"), 360 (long) hist_scale * sizeof (UNIT)); 361 if (total_time > 0.0) 362 { 363 printf (_(" for %.2f%% of %.2f %s\n\n"), 364 100.0 / total_time, total_time / hz, hist_dimension); 365 } 366 } 367 else 368 { 369 printf (_("\nEach sample counts as %g %s.\n"), 1.0 / hz, hist_dimension); 370 } 371 372 if (total_time <= 0.0) 373 { 374 printf (_(" no time accumulated\n\n")); 375 376 /* This doesn't hurt since all the numerators will be zero. */ 377 total_time = 1.0; 378 } 379 380 printf ("%5.5s %10.10s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", 381 "% ", _("cumulative"), _("self "), "", _("self "), _("total "), 382 ""); 383 printf ("%5.5s %9.9s %8.8s %8.8s %8.8s %8.8s %-8.8s\n", 384 _("time"), hist_dimension, hist_dimension, _("calls"), unit, unit, 385 _("name")); 386 } 387 388 389 static void 390 print_line (Sym *sym, double scale) 391 { 392 if (ignore_zeros && sym->ncalls == 0 && sym->hist.time == 0) 393 return; 394 395 accum_time += sym->hist.time; 396 397 if (bsd_style_output) 398 printf ("%5.1f %10.2f %8.2f", 399 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, 400 accum_time / hz, sym->hist.time / hz); 401 else 402 printf ("%6.2f %9.2f %8.2f", 403 total_time > 0.0 ? 100 * sym->hist.time / total_time : 0.0, 404 accum_time / hz, sym->hist.time / hz); 405 406 if (sym->ncalls != 0) 407 printf (" %8lu %8.2f %8.2f ", 408 sym->ncalls, scale * sym->hist.time / hz / sym->ncalls, 409 scale * (sym->hist.time + sym->cg.child_time) / hz / sym->ncalls); 410 else 411 printf (" %8.8s %8.8s %8.8s ", "", "", ""); 412 413 if (bsd_style_output) 414 print_name (sym); 415 else 416 print_name_only (sym); 417 418 printf ("\n"); 419 } 420 421 422 /* Compare LP and RP. The primary comparison key is execution time, 423 the secondary is number of invocation, and the tertiary is the 424 lexicographic order of the function names. */ 425 426 static int 427 cmp_time (const PTR lp, const PTR rp) 428 { 429 const Sym *left = *(const Sym **) lp; 430 const Sym *right = *(const Sym **) rp; 431 double time_diff; 432 433 time_diff = right->hist.time - left->hist.time; 434 435 if (time_diff > 0.0) 436 return 1; 437 438 if (time_diff < 0.0) 439 return -1; 440 441 if (right->ncalls > left->ncalls) 442 return 1; 443 444 if (right->ncalls < left->ncalls) 445 return -1; 446 447 return strcmp (left->name, right->name); 448 } 449 450 451 /* Print the flat histogram profile. */ 452 453 void 454 hist_print () 455 { 456 Sym **time_sorted_syms, *top_dog, *sym; 457 unsigned int index; 458 unsigned log_scale; 459 double top_time, time; 460 bfd_vma addr; 461 462 if (first_output) 463 first_output = FALSE; 464 else 465 printf ("\f\n"); 466 467 accum_time = 0.0; 468 469 if (bsd_style_output) 470 { 471 if (print_descriptions) 472 { 473 printf (_("\n\n\nflat profile:\n")); 474 flat_blurb (stdout); 475 } 476 } 477 else 478 { 479 printf (_("Flat profile:\n")); 480 } 481 482 /* Sort the symbol table by time (call-count and name as secondary 483 and tertiary keys). */ 484 time_sorted_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *)); 485 486 for (index = 0; index < symtab.len; ++index) 487 time_sorted_syms[index] = &symtab.base[index]; 488 489 qsort (time_sorted_syms, symtab.len, sizeof (Sym *), cmp_time); 490 491 if (bsd_style_output) 492 { 493 log_scale = 5; /* Milli-seconds is BSD-default. */ 494 } 495 else 496 { 497 /* Search for symbol with highest per-call 498 execution time and scale accordingly. */ 499 log_scale = 0; 500 top_dog = 0; 501 top_time = 0.0; 502 503 for (index = 0; index < symtab.len; ++index) 504 { 505 sym = time_sorted_syms[index]; 506 507 if (sym->ncalls != 0) 508 { 509 time = (sym->hist.time + sym->cg.child_time) / sym->ncalls; 510 511 if (time > top_time) 512 { 513 top_dog = sym; 514 top_time = time; 515 } 516 } 517 } 518 519 if (top_dog && top_dog->ncalls != 0 && top_time > 0.0) 520 { 521 top_time /= hz; 522 523 for (log_scale = 0; log_scale < ARRAY_SIZE (SItab); log_scale ++) 524 { 525 double scaled_value = SItab[log_scale].scale * top_time; 526 527 if (scaled_value >= 1.0 && scaled_value < 1000.0) 528 break; 529 } 530 } 531 } 532 533 /* For now, the dimension is always seconds. In the future, we 534 may also want to support other (pseudo-)dimensions (such as 535 I-cache misses etc.). */ 536 print_header (SItab[log_scale].prefix); 537 538 for (index = 0; index < symtab.len; ++index) 539 { 540 addr = time_sorted_syms[index]->addr; 541 542 /* Print symbol if its in INCL_FLAT table or that table 543 is empty and the symbol is not in EXCL_FLAT. */ 544 if (sym_lookup (&syms[INCL_FLAT], addr) 545 || (syms[INCL_FLAT].len == 0 546 && !sym_lookup (&syms[EXCL_FLAT], addr))) 547 print_line (time_sorted_syms[index], SItab[log_scale].scale); 548 } 549 550 free (time_sorted_syms); 551 552 if (print_descriptions && !bsd_style_output) 553 flat_blurb (stdout); 554 } 555