1 /* $OpenBSD: gprof.c,v 1.19 2009/10/27 23:59:38 deraadt Exp $ */ 2 /* $NetBSD: gprof.c,v 1.8 1995/04/19 07:15:59 cgd Exp $ */ 3 4 /* 5 * Copyright (c) 1983, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include "gprof.h" 34 35 int valcmp(const void *, const void *); 36 37 static struct gmonhdr gmonhdr; 38 extern char *__progname; 39 40 int 41 main(int argc, char *argv[]) 42 { 43 char **sp; 44 nltype **timesortnlp; 45 char **defaultEs; 46 47 --argc; 48 argv++; 49 debug = 0; 50 bflag = TRUE; 51 while ( *argv != 0 && **argv == '-' ) { 52 (*argv)++; 53 switch ( **argv ) { 54 case 'a': 55 aflag = TRUE; 56 break; 57 case 'b': 58 bflag = FALSE; 59 break; 60 case 'C': 61 Cflag = TRUE; 62 cyclethreshold = atoi( *++argv ); 63 break; 64 case 'c': 65 #if defined(__i386__) || defined(__vax__) || defined(__tahoe__) || \ 66 defined(__sparc__) || defined(__sparc64__) 67 cflag = TRUE; 68 #else 69 fprintf(stderr, "%s: -c isn't supported on this architecture yet\n", __progname); 70 exit(1); 71 #endif 72 break; 73 case 'd': 74 dflag = TRUE; 75 setlinebuf(stdout); 76 debug |= atoi( *++argv ); 77 debug |= ANYDEBUG; 78 # ifdef DEBUG 79 printf("[main] debug = %d\n", debug); 80 # else /* not DEBUG */ 81 warnx("-d ignored"); 82 # endif /* DEBUG */ 83 break; 84 case 'E': 85 ++argv; 86 addlist( Elist , *argv ); 87 Eflag = TRUE; 88 addlist( elist , *argv ); 89 eflag = TRUE; 90 break; 91 case 'e': 92 addlist( elist , *++argv ); 93 eflag = TRUE; 94 break; 95 case 'F': 96 ++argv; 97 addlist( Flist , *argv ); 98 Fflag = TRUE; 99 addlist( flist , *argv ); 100 fflag = TRUE; 101 break; 102 case 'f': 103 addlist( flist , *++argv ); 104 fflag = TRUE; 105 break; 106 case 'k': 107 addlist( kfromlist , *++argv ); 108 addlist( ktolist , *++argv ); 109 kflag = TRUE; 110 break; 111 case 's': 112 sflag = TRUE; 113 break; 114 case 'z': 115 zflag = TRUE; 116 break; 117 } 118 argv++; 119 } 120 if ( *argv != 0 ) { 121 a_outname = *argv; 122 argv++; 123 } else { 124 a_outname = A_OUTNAME; 125 } 126 if ( *argv != 0 ) { 127 gmonname = *argv; 128 argv++; 129 } else { 130 gmonname = GMONNAME; 131 } 132 /* 133 * get information about a.out file. 134 */ 135 if (getnfile(a_outname, &defaultEs) == -1) 136 errx(1, "%s: bad format", a_outname); 137 /* 138 * sort symbol table. 139 */ 140 qsort(nl, nname, sizeof(nltype), valcmp); 141 /* 142 * turn off default functions 143 */ 144 for ( sp = &defaultEs[0] ; *sp ; sp++ ) { 145 Eflag = TRUE; 146 addlist( Elist , *sp ); 147 eflag = TRUE; 148 addlist( elist , *sp ); 149 } 150 /* 151 * get information about mon.out file(s). 152 */ 153 do { 154 getpfile( gmonname ); 155 if ( *argv != 0 ) { 156 gmonname = *argv; 157 } 158 } while ( *argv++ != 0 ); 159 /* 160 * how many ticks per second? 161 * if we can't tell, report time in ticks. 162 */ 163 if (hz == 0) { 164 hz = 1; 165 warnx("time is in ticks, not seconds"); 166 } 167 /* 168 * dump out a gmon.sum file if requested 169 */ 170 if ( sflag ) { 171 dumpsum( GMONSUM ); 172 } 173 /* 174 * assign samples to procedures 175 */ 176 asgnsamples(); 177 /* 178 * assemble the dynamic profile 179 */ 180 timesortnlp = doarcs(); 181 /* 182 * print the dynamic profile 183 */ 184 printgprof( timesortnlp ); 185 /* 186 * print the flat profile 187 */ 188 printprof(); 189 /* 190 * print the index 191 */ 192 printindex(); 193 194 return (0); 195 } 196 197 /* 198 * information from a gmon.out file is in two parts: 199 * an array of sampling hits within pc ranges, 200 * and the arcs. 201 */ 202 void 203 getpfile(const char *filename) 204 { 205 FILE *pfile; 206 struct rawarc arc; 207 208 pfile = openpfile(filename); 209 readsamples(pfile); 210 /* 211 * the rest of the file consists of 212 * a bunch of <from,self,count> tuples. 213 */ 214 while ( fread( &arc , sizeof arc , 1 , pfile ) == 1 ) { 215 # ifdef DEBUG 216 if ( debug & SAMPLEDEBUG ) { 217 printf( "[getpfile] frompc 0x%lx selfpc 0x%lx count %ld\n" , 218 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 219 } 220 # endif /* DEBUG */ 221 /* 222 * add this arc 223 */ 224 tally( &arc ); 225 } 226 fclose(pfile); 227 } 228 229 FILE * 230 openpfile(const char *filename) 231 { 232 struct gmonhdr tmp; 233 FILE *pfile; 234 int size; 235 int rate; 236 237 if((pfile = fopen(filename, "r")) == NULL) 238 err(1, "fopen: %s", filename); 239 if (fread(&tmp, sizeof(struct gmonhdr), 1, pfile) != 1) 240 errx(1, "%s: bad gmon header", filename); 241 if ( s_highpc != 0 && ( tmp.lpc != gmonhdr.lpc || 242 tmp.hpc != gmonhdr.hpc || tmp.ncnt != gmonhdr.ncnt)) 243 errx(1, "%s: incompatible with first gmon file", filename); 244 gmonhdr = tmp; 245 if ( gmonhdr.version == GMONVERSION ) { 246 rate = gmonhdr.profrate; 247 size = sizeof(struct gmonhdr); 248 } else { 249 fseek(pfile, sizeof(struct ophdr), SEEK_SET); 250 size = sizeof(struct ophdr); 251 gmonhdr.profrate = rate = hertz(); 252 gmonhdr.version = GMONVERSION; 253 } 254 if (hz == 0) { 255 hz = rate; 256 } else if (hz != rate) 257 errx(1, "%s: profile clock rate (%d) incompatible with clock rate " 258 "(%ld) in first gmon file", filename, rate, hz); 259 s_lowpc = (unsigned long) gmonhdr.lpc; 260 s_highpc = (unsigned long) gmonhdr.hpc; 261 lowpc = (unsigned long)gmonhdr.lpc / sizeof(UNIT); 262 highpc = (unsigned long)gmonhdr.hpc / sizeof(UNIT); 263 sampbytes = gmonhdr.ncnt - size; 264 nsamples = sampbytes / sizeof (UNIT); 265 # ifdef DEBUG 266 if ( debug & SAMPLEDEBUG ) { 267 printf( "[openpfile] hdr.lpc 0x%lx hdr.hpc 0x%lx hdr.ncnt %d\n", 268 gmonhdr.lpc , gmonhdr.hpc , gmonhdr.ncnt ); 269 printf( "[openpfile] s_lowpc 0x%lx s_highpc 0x%lx\n" , 270 s_lowpc , s_highpc ); 271 printf( "[openpfile] lowpc 0x%lx highpc 0x%lx\n" , 272 lowpc , highpc ); 273 printf( "[openpfile] sampbytes %d nsamples %d\n" , 274 sampbytes , nsamples ); 275 printf( "[openpfile] sample rate %ld\n" , hz ); 276 } 277 # endif /* DEBUG */ 278 return(pfile); 279 } 280 281 void 282 tally(struct rawarc *rawp) 283 { 284 nltype *parentp; 285 nltype *childp; 286 287 parentp = nllookup( rawp -> raw_frompc ); 288 childp = nllookup( rawp -> raw_selfpc ); 289 if ( parentp == 0 || childp == 0 ) 290 return; 291 if ( kflag 292 && onlist( kfromlist , parentp -> name ) 293 && onlist( ktolist , childp -> name ) ) { 294 return; 295 } 296 childp -> ncall += rawp -> raw_count; 297 # ifdef DEBUG 298 if ( debug & TALLYDEBUG ) { 299 printf( "[tally] arc from %s to %s traversed %ld times\n" , 300 parentp -> name , childp -> name , rawp -> raw_count ); 301 } 302 # endif /* DEBUG */ 303 addarc( parentp , childp , rawp -> raw_count ); 304 } 305 306 /* 307 * dump out the gmon.sum file 308 */ 309 void 310 dumpsum(const char *sumfile) 311 { 312 nltype *nlp; 313 arctype *arcp; 314 struct rawarc arc; 315 FILE *sfile; 316 317 if ( ( sfile = fopen ( sumfile , "w" ) ) == NULL ) 318 err(1, "fopen: %s", sumfile); 319 /* 320 * dump the header; use the last header read in 321 */ 322 if ( fwrite( &gmonhdr , sizeof gmonhdr , 1 , sfile ) != 1 ) 323 err(1, "fwrite: %s", sumfile); 324 /* 325 * dump the samples 326 */ 327 if (fwrite(samples, sizeof (UNIT), nsamples, sfile) != nsamples) 328 err(1, "fwrite: %s", sumfile); 329 /* 330 * dump the normalized raw arc information 331 */ 332 for ( nlp = nl ; nlp < npe ; nlp++ ) { 333 for ( arcp = nlp -> children ; arcp ; arcp = arcp -> arc_childlist ) { 334 arc.raw_frompc = arcp -> arc_parentp -> value; 335 arc.raw_selfpc = arcp -> arc_childp -> value; 336 arc.raw_count = arcp -> arc_count; 337 if (fwrite ( &arc , sizeof arc , 1 , sfile ) != 1) 338 err(1, "fwrite: %s", sumfile); 339 # ifdef DEBUG 340 if ( debug & SAMPLEDEBUG ) { 341 printf( "[dumpsum] frompc 0x%lx selfpc 0x%lx count %ld\n" , 342 arc.raw_frompc , arc.raw_selfpc , arc.raw_count ); 343 } 344 # endif /* DEBUG */ 345 } 346 } 347 fclose( sfile ); 348 } 349 350 int 351 valcmp(const void *vp1, const void *vp2) 352 { 353 const nltype *p1 = vp1; 354 const nltype *p2 = vp2; 355 356 if ( p1 -> value < p2 -> value ) { 357 return LESSTHAN; 358 } 359 if ( p1 -> value > p2 -> value ) { 360 return GREATERTHAN; 361 } 362 return EQUALTO; 363 } 364 365 void 366 readsamples(FILE *pfile) 367 { 368 UNIT sample; 369 int i; 370 371 if (samples == 0) { 372 samples = (UNIT *) calloc(sampbytes, sizeof (UNIT)); 373 if (samples == 0) 374 errx(1, "No room for %ld sample pc's", sampbytes / sizeof (UNIT)); 375 } 376 for (i = 0; i < nsamples; i++) { 377 fread(&sample, sizeof (UNIT), 1, pfile); 378 if (feof(pfile)) 379 break; 380 samples[i] += sample; 381 } 382 if (i != nsamples) 383 errx(1, "unexpected EOF after reading %d/%d samples", i, nsamples ); 384 } 385 386 /* 387 * Assign samples to the procedures to which they belong. 388 * 389 * There are three cases as to where pcl and pch can be 390 * with respect to the routine entry addresses svalue0 and svalue1 391 * as shown in the following diagram. overlap computes the 392 * distance between the arrows, the fraction of the sample 393 * that is to be credited to the routine which starts at svalue0. 394 * 395 * svalue0 svalue1 396 * | | 397 * v v 398 * 399 * +-----------------------------------------------+ 400 * | | 401 * | ->| |<- ->| |<- ->| |<- | 402 * | | | | | | 403 * +---------+ +---------+ +---------+ 404 * 405 * ^ ^ ^ ^ ^ ^ 406 * | | | | | | 407 * pcl pch pcl pch pcl pch 408 * 409 * For the vax we assert that samples will never fall in the first 410 * two bytes of any routine, since that is the entry mask, 411 * thus we give call alignentries() to adjust the entry points if 412 * the entry mask falls in one bucket but the code for the routine 413 * doesn't start until the next bucket. In conjunction with the 414 * alignment of routine addresses, this should allow us to have 415 * only one sample for every four bytes of text space and never 416 * have any overlap (the two end cases, above). 417 */ 418 void 419 asgnsamples(void) 420 { 421 int j; 422 UNIT ccnt; 423 double time; 424 unsigned long pcl, pch; 425 unsigned long i; 426 unsigned long overlap; 427 unsigned long svalue0, svalue1; 428 429 /* read samples and assign to namelist symbols */ 430 scale = highpc - lowpc; 431 scale /= nsamples; 432 alignentries(); 433 for (i = 0, j = 1; i < nsamples; i++) { 434 ccnt = samples[i]; 435 if (ccnt == 0) 436 continue; 437 pcl = lowpc + (unsigned long)(scale * i); 438 pch = lowpc + (unsigned long)(scale * (i + 1)); 439 time = ccnt; 440 # ifdef DEBUG 441 if ( debug & SAMPLEDEBUG ) { 442 printf( "[asgnsamples] pcl 0x%lx pch 0x%lx ccnt %d\n" , 443 pcl , pch , ccnt ); 444 } 445 # endif /* DEBUG */ 446 totime += time; 447 for (j = j - 1; j < nname; j++) { 448 svalue0 = nl[j].svalue; 449 svalue1 = nl[j+1].svalue; 450 /* 451 * if high end of tick is below entry address, 452 * go for next tick. 453 */ 454 if (pch < svalue0) 455 break; 456 /* 457 * if low end of tick into next routine, 458 * go for next routine. 459 */ 460 if (pcl >= svalue1) 461 continue; 462 overlap = min(pch, svalue1) - max(pcl, svalue0); 463 if (overlap > 0) { 464 # ifdef DEBUG 465 if (debug & SAMPLEDEBUG) { 466 printf("[asgnsamples] (0x%lx->0x%lx-0x%lx) %s gets %f ticks %ld overlap\n", 467 nl[j].value/sizeof(UNIT), svalue0, svalue1, 468 nl[j].name, 469 overlap * time / scale, overlap); 470 } 471 # endif /* DEBUG */ 472 nl[j].time += overlap * time / scale; 473 } 474 } 475 } 476 # ifdef DEBUG 477 if (debug & SAMPLEDEBUG) { 478 printf("[asgnsamples] totime %f\n", totime); 479 } 480 # endif /* DEBUG */ 481 } 482 483 484 unsigned long 485 min(unsigned long a, unsigned long b) 486 { 487 if (a<b) 488 return(a); 489 return(b); 490 } 491 492 unsigned long 493 max(unsigned long a, unsigned long b) 494 { 495 if (a>b) 496 return(a); 497 return(b); 498 } 499 500 /* 501 * calculate scaled entry point addresses (to save time in asgnsamples), 502 * and possibly push the scaled entry points over the entry mask, 503 * if it turns out that the entry point is in one bucket and the code 504 * for a routine is in the next bucket. 505 */ 506 void 507 alignentries(void) 508 { 509 struct nl *nlp; 510 unsigned long bucket_of_entry; 511 unsigned long bucket_of_code; 512 513 for (nlp = nl; nlp < npe; nlp++) { 514 nlp -> svalue = nlp -> value / sizeof(UNIT); 515 bucket_of_entry = (nlp->svalue - lowpc) / scale; 516 bucket_of_code = (nlp->svalue + UNITS_TO_CODE - lowpc) / scale; 517 if (bucket_of_entry < bucket_of_code) { 518 # ifdef DEBUG 519 if (debug & SAMPLEDEBUG) { 520 printf("[alignentries] pushing svalue 0x%lx to 0x%lx\n", 521 nlp->svalue, nlp->svalue + UNITS_TO_CODE); 522 } 523 # endif /* DEBUG */ 524 nlp->svalue += UNITS_TO_CODE; 525 } 526 } 527 } 528