1 /* $NetBSD: main.c,v 1.4 2006/09/10 21:09:48 wiz Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * TODO: 41 * 42 * - Need better analysis and tracking of events. 43 * - Should be binary format agnostic, but given that we're likely to be using 44 * ELF for quite a while that's not a big problem. 45 * - Shouldn't have to parse the namelist here. We should use something like 46 * FreeBSD's libelf. 47 * - The way the namelist is searched sucks, is it worth doing something 48 * better? 49 * - Might be nice to record events and replay later, like ktrace/kdump. 50 */ 51 52 #include <sys/cdefs.h> 53 #ifndef lint 54 __RCSID("$NetBSD: main.c,v 1.4 2006/09/10 21:09:48 wiz Exp $"); 55 #endif /* not lint */ 56 57 #include <sys/types.h> 58 #include <sys/param.h> 59 #include <sys/time.h> 60 #include <sys/fcntl.h> 61 #include <sys/ioctl.h> 62 #include <sys/wait.h> 63 #include <sys/signal.h> 64 #include <sys/sysctl.h> 65 66 #include <dev/lockstat.h> 67 68 #include <stdio.h> 69 #include <stdlib.h> 70 #include <string.h> 71 #include <limits.h> 72 #include <unistd.h> 73 #include <err.h> 74 #include <paths.h> 75 #include <util.h> 76 #include <ctype.h> 77 #include <errno.h> 78 79 #include "extern.h" 80 81 #define _PATH_DEV_LOCKSTAT "/dev/lockstat" 82 83 #define MILLI 1000.0 84 #define MICRO 1000000.0 85 #define NANO 1000000000.0 86 #define PICO 1000000000000.0 87 88 TAILQ_HEAD(lock_head, lockstruct); 89 typedef struct lock_head locklist_t; 90 TAILQ_HEAD(buf_head, lsbuf); 91 typedef struct buf_head buflist_t; 92 93 typedef struct lockstruct { 94 TAILQ_ENTRY(lockstruct) chain; 95 buflist_t bufs; 96 uintptr_t lock; 97 double times[LB_NEVENT]; 98 uint32_t counts[LB_NEVENT]; 99 u_int flags; 100 } lock_t; 101 102 typedef struct name { 103 const char *name; 104 int mask; 105 } name_t; 106 107 const name_t locknames[] = { 108 { "adaptive_mutex", LB_ADAPTIVE_MUTEX }, 109 { "adaptive_rwlock", LB_ADAPTIVE_RWLOCK }, 110 { "spin_mutex", LB_SPIN_MUTEX }, 111 { "spin_rwlock", LB_SPIN_RWLOCK }, 112 { "lockmgr", LB_LOCKMGR }, 113 { NULL, 0 } 114 }; 115 116 const name_t eventnames[] = { 117 { "spin", LB_SPIN }, 118 { "sleep", LB_SLEEP }, 119 { NULL, 0 }, 120 }; 121 122 const name_t alltypes[] = { 123 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN }, 124 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP }, 125 { "Adaptive RW lock spin", LB_ADAPTIVE_RWLOCK | LB_SPIN }, 126 { "Adaptive RW lock sleep", LB_ADAPTIVE_RWLOCK | LB_SLEEP }, 127 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN }, 128 { "Spin RW lock spin", LB_SPIN_RWLOCK | LB_SPIN }, 129 { "lockmgr sleep", LB_LOCKMGR | LB_SLEEP }, 130 { NULL, 0 } 131 }; 132 133 locklist_t locklist[LB_NLOCK]; 134 135 lsbuf_t *bufs; 136 lsdisable_t ld; 137 int lflag; 138 int nbufs; 139 int cflag; 140 int lsfd; 141 int displayed; 142 int bin64; 143 double tscale; 144 double cscale; 145 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])]; 146 FILE *outfp; 147 148 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *); 149 void spawn(int, char **); 150 void display(int, const char *name); 151 void listnames(const name_t *); 152 int matchname(const name_t *, const char *); 153 void makelists(void); 154 void nullsig(int); 155 void usage(void); 156 void resort(int, int); 157 int ncpu(void); 158 159 int 160 main(int argc, char **argv) 161 { 162 int eventtype, locktype, ch, nlfd, sflag, fd, i, pflag; 163 const char *nlistf, *outf; 164 char *lockname, *funcname; 165 const name_t *name; 166 lsenable_t le; 167 double ms; 168 char *p; 169 170 nlistf = NULL; 171 outf = NULL; 172 lockname = NULL; 173 funcname = NULL; 174 eventtype = -1; 175 locktype = -1; 176 nbufs = 0; 177 sflag = 0; 178 pflag = 0; 179 180 while ((ch = getopt(argc, argv, "E:F:L:M:N:T:b:ceflo:pst")) != -1) 181 switch (ch) { 182 case 'E': 183 eventtype = matchname(eventnames, optarg); 184 break; 185 case 'F': 186 funcname = optarg; 187 break; 188 case 'L': 189 lockname = optarg; 190 break; 191 case 'N': 192 nlistf = optarg; 193 break; 194 case 'T': 195 locktype = matchname(locknames, optarg); 196 break; 197 case 'b': 198 nbufs = (int)strtol(optarg, &p, 0); 199 if (!isdigit((u_int)*optarg) || *p != '\0') 200 usage(); 201 break; 202 case 'c': 203 cflag = 1; 204 break; 205 case 'e': 206 listnames(eventnames); 207 break; 208 case 'l': 209 lflag = 1; 210 break; 211 case 'o': 212 outf = optarg; 213 break; 214 case 'p': 215 pflag = 1; 216 break; 217 case 's': 218 sflag = 1; 219 break; 220 case 't': 221 listnames(locknames); 222 break; 223 default: 224 usage(); 225 } 226 argc -= optind; 227 argv += optind; 228 229 if (*argv == NULL) 230 usage(); 231 232 if (outf) { 233 if ((fd = open(outf, O_WRONLY | O_CREAT, 0600)) == -1) 234 err(EXIT_FAILURE, "opening %s", outf); 235 outfp = fdopen(fd, "w"); 236 } else 237 outfp = stdout; 238 239 /* 240 * Find the name list for resolving symbol names, and load it into 241 * memory. 242 */ 243 if (nlistf == NULL) { 244 nlfd = open(_PATH_KSYMS, O_RDONLY); 245 nlistf = getbootfile(); 246 } else 247 nlfd = -1; 248 if (nlfd == -1) { 249 if ((nlfd = open(nlistf, O_RDONLY)) < 0) 250 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s", 251 nlistf); 252 } 253 if (loadsym32(nlfd) != 0) { 254 if (loadsym64(nlfd) != 0) 255 errx(EXIT_FAILURE, "unable to load symbol table"); 256 bin64 = 1; 257 } 258 close(nlfd); 259 260 memset(&le, 0, sizeof(le)); 261 le.le_nbufs = nbufs; 262 263 /* 264 * Set up initial filtering. 265 */ 266 if (lockname != NULL) { 267 findsym(LOCK_BYNAME, lockname, &le.le_lock, NULL); 268 le.le_flags |= LE_ONE_LOCK; 269 } 270 if (!lflag) 271 le.le_flags |= LE_CALLSITE; 272 if (funcname != NULL) { 273 if (lflag) 274 usage(); 275 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend); 276 le.le_flags |= LE_ONE_CALLSITE; 277 } 278 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK); 279 280 /* 281 * Start tracing. 282 */ 283 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0) 284 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT); 285 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0) 286 err(EXIT_FAILURE, "ioctl"); 287 if (ch != LS_VERSION) 288 errx(EXIT_FAILURE, "incompatible lockstat interface version"); 289 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le)) 290 err(EXIT_FAILURE, "cannot enable tracing"); 291 292 /* 293 * Execute the traced program. 294 */ 295 spawn(argc, argv); 296 297 /* 298 * Stop tracing, and read the trace buffers from the kernel. 299 */ 300 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) { 301 if (errno == EOVERFLOW) { 302 warnx("overflowed available kernel trace buffers"); 303 exit(EXIT_FAILURE); 304 } 305 err(EXIT_FAILURE, "cannot disable tracing"); 306 } 307 if ((bufs = malloc(ld.ld_size)) == NULL) 308 err(EXIT_FAILURE, "cannot allocate memory for user buffers"); 309 if (read(lsfd, bufs, ld.ld_size) != ld.ld_size) 310 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT); 311 if (close(lsfd)) 312 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")"); 313 314 /* 315 * Figure out how to scale the results, and build the lists. For 316 * internal use we convert all times from CPU frequency based to 317 * picoseconds, and values are eventually displayed in ms. 318 */ 319 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++) 320 if (ld.ld_freq[i] != 0) 321 cpuscale[i] = PICO / ld.ld_freq[i]; 322 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO; 323 if (pflag) 324 cscale = 1.0 / ncpu(); 325 else 326 cscale = 1.0; 327 cscale *= (sflag ? MILLI / ms : 1.0); 328 tscale = cscale / NANO; 329 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t)); 330 makelists(); 331 332 /* 333 * Display the results. 334 */ 335 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI); 336 if (sflag || pflag) { 337 fprintf(outfp, " Displaying "); 338 if (pflag) 339 fprintf(outfp, "per-CPU "); 340 if (sflag) 341 fprintf(outfp, "per-second "); 342 fprintf(outfp, "averages."); 343 } 344 putc('\n', outfp); 345 346 for (name = alltypes; name->name != NULL; name++) { 347 if (eventtype != -1 && 348 (name->mask & LB_EVENT_MASK) != eventtype) 349 continue; 350 if (locktype != -1 && 351 (name->mask & LB_LOCK_MASK) != locktype) 352 continue; 353 354 display(name->mask, name->name); 355 } 356 357 if (displayed == 0) 358 fprintf(outfp, "None of the selected events were recorded.\n"); 359 exit(EXIT_SUCCESS); 360 } 361 362 void 363 usage(void) 364 { 365 366 fprintf(stderr, 367 "%s: usage:\n" 368 "%s [options] <command>\n\n" 369 "-b nbuf\t\tset number of event buffers to allocate\n" 370 "-c\t\treport percentage of total events by count, not time\n" 371 "-E evt\t\tdisplay only one type of event\n" 372 "-e\t\tlist event types\n" 373 "-F func\t\tlimit trace to one function\n" 374 "-L lock\t\tlimit trace to one lock (name, or address)\n" 375 "-l\t\ttrace only by lock\n" 376 "-N nlist\tspecify name list file\n" 377 "-o file\t\tsend output to named file, not stdout\n" 378 "-p\t\tshow average count/time per CPU, not total\n" 379 "-s\t\tshow average count/time per second, not total\n" 380 "-T type\t\tdisplay only one type of lock\n" 381 "-t\t\tlist lock types\n", 382 getprogname(), getprogname()); 383 384 exit(EXIT_FAILURE); 385 } 386 387 void 388 nullsig(int junk) 389 { 390 391 (void)junk; 392 } 393 394 void 395 listnames(const name_t *name) 396 { 397 398 for (; name->name != NULL; name++) 399 printf("%s\n", name->name); 400 401 exit(EXIT_SUCCESS); 402 } 403 404 int 405 matchname(const name_t *name, const char *string) 406 { 407 408 for (; name->name != NULL; name++) 409 if (strcasecmp(name->name, string) == 0) 410 return name->mask; 411 412 warnx("unknown type `%s'", string); 413 usage(); 414 return 0; 415 } 416 417 /* 418 * Return the number of CPUs in the running system. 419 */ 420 int 421 ncpu(void) 422 { 423 int rv, mib[2]; 424 size_t varlen; 425 426 mib[0] = CTL_HW; 427 mib[1] = HW_NCPU; 428 varlen = sizeof(rv); 429 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0) 430 rv = 1; 431 432 return (rv); 433 } 434 435 /* 436 * Call into the ELF parser and look up a symbol by name or by address. 437 */ 438 void 439 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end) 440 { 441 uintptr_t tend; 442 char *p; 443 int rv; 444 445 if (end == NULL) 446 end = &tend; 447 448 if (find == LOCK_BYNAME) { 449 if (isdigit((u_int)name[0])) { 450 *start = (uintptr_t)strtoul(name, &p, 0); 451 if (*p == '\0') 452 return; 453 } 454 } 455 456 if (bin64) 457 rv = findsym64(find, name, start, end); 458 else 459 rv = findsym32(find, name, start, end); 460 461 if (find == FUNC_BYNAME || find == LOCK_BYNAME) { 462 if (rv == -1) 463 errx(EXIT_FAILURE, "unable to find symbol `%s'", name); 464 return; 465 } 466 467 if (rv == -1) 468 sprintf(name, "0x%016lx", (long)*start); 469 } 470 471 /* 472 * Fork off the child process and wait for it to complete. We trap SIGINT 473 * so that the caller can use Ctrl-C to stop tracing early and still get 474 * useful results. 475 */ 476 void 477 spawn(int argc, char **argv) 478 { 479 pid_t pid; 480 481 switch (pid = fork()) { 482 case 0: 483 close(lsfd); 484 if (execvp(argv[0], argv) == -1) 485 err(EXIT_FAILURE, "cannot exec"); 486 break; 487 case -1: 488 err(EXIT_FAILURE, "cannot fork to exec"); 489 break; 490 default: 491 signal(SIGINT, nullsig); 492 wait(NULL); 493 signal(SIGINT, SIG_DFL); 494 break; 495 } 496 } 497 498 /* 499 * From the kernel supplied data, construct two dimensional lists of locks 500 * and event buffers, indexed by lock type. 501 */ 502 void 503 makelists(void) 504 { 505 lsbuf_t *lb, *lb2, *max; 506 int i, type; 507 lock_t *l; 508 509 for (i = 0; i < LB_NLOCK; i++) 510 TAILQ_INIT(&locklist[i]); 511 512 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) { 513 if (lb->lb_flags == 0) 514 continue; 515 516 /* 517 * Look for a record descibing this lock, and allocate a 518 * new one if needed. 519 */ 520 type = ((lb->lb_flags & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 521 TAILQ_FOREACH(l, &locklist[type], chain) { 522 if (l->lock == lb->lb_lock) 523 break; 524 } 525 if (l == NULL) { 526 l = (lock_t *)malloc(sizeof(*l)); 527 l->flags = lb->lb_flags; 528 l->lock = lb->lb_lock; 529 memset(&l->counts, 0, sizeof(l->counts)); 530 memset(&l->times, 0, sizeof(l->times)); 531 TAILQ_INIT(&l->bufs); 532 TAILQ_INSERT_TAIL(&locklist[type], l, chain); 533 } 534 535 /* 536 * Scale the time values per buffer and summarise 537 * times+counts per lock. 538 */ 539 for (i = 0; i < LB_NEVENT; i++) { 540 lb->lb_times[i] *= cpuscale[lb->lb_cpu]; 541 l->counts[i] += lb->lb_counts[i]; 542 l->times[i] += lb->lb_times[i]; 543 } 544 545 /* 546 * Merge same lock+callsite pairs from multiple CPUs 547 * together. 548 */ 549 TAILQ_FOREACH(lb2, &l->bufs, lb_chain.tailq) { 550 if (lb->lb_callsite == lb2->lb_callsite) 551 break; 552 } 553 if (lb2 != NULL) { 554 for (i = 0; i < LB_NEVENT; i++) { 555 lb2->lb_counts[i] += lb->lb_counts[i]; 556 lb2->lb_times[i] += lb->lb_times[i]; 557 } 558 } else 559 TAILQ_INSERT_HEAD(&l->bufs, lb, lb_chain.tailq); 560 } 561 } 562 563 /* 564 * Re-sort one list of locks / lock buffers by event type. 565 */ 566 void 567 resort(int type, int event) 568 { 569 lsbuf_t *lb, *lb2; 570 locklist_t llist; 571 buflist_t blist; 572 lock_t *l, *l2; 573 574 TAILQ_INIT(&llist); 575 while ((l = TAILQ_FIRST(&locklist[type])) != NULL) { 576 TAILQ_REMOVE(&locklist[type], l, chain); 577 578 /* 579 * Sort the buffers into the per-lock list. 580 */ 581 TAILQ_INIT(&blist); 582 while ((lb = TAILQ_FIRST(&l->bufs)) != NULL) { 583 TAILQ_REMOVE(&l->bufs, lb, lb_chain.tailq); 584 585 lb2 = TAILQ_FIRST(&blist); 586 while (lb2 != NULL) { 587 if (cflag) { 588 if (lb->lb_counts[event] > 589 lb2->lb_counts[event]) 590 break; 591 } else if (lb->lb_times[event] > 592 lb2->lb_times[event]) 593 break; 594 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq); 595 } 596 if (lb2 == NULL) 597 TAILQ_INSERT_TAIL(&blist, lb, lb_chain.tailq); 598 else 599 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq); 600 } 601 l->bufs = blist; 602 603 /* 604 * Sort this lock into the per-type list, based on the 605 * totals per lock. 606 */ 607 l2 = TAILQ_FIRST(&llist); 608 while (l2 != NULL) { 609 if (cflag) { 610 if (l->counts[event] > l2->counts[event]) 611 break; 612 } else if (l->times[event] > l2->times[event]) 613 break; 614 l2 = TAILQ_NEXT(l2, chain); 615 } 616 if (l2 == NULL) 617 TAILQ_INSERT_TAIL(&llist, l, chain); 618 else 619 TAILQ_INSERT_BEFORE(l2, l, chain); 620 } 621 locklist[type] = llist; 622 } 623 624 /* 625 * Display a summary table for one lock type / event type pair. 626 */ 627 void 628 display(int mask, const char *name) 629 { 630 lock_t *l; 631 lsbuf_t *lb; 632 int event, type; 633 double pcscale, metric; 634 char lname[256], fname[256]; 635 636 type = ((mask & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 637 if (TAILQ_FIRST(&locklist[type]) == NULL) 638 return; 639 640 event = (mask & LB_EVENT_MASK) - 1; 641 resort(type, event); 642 643 fprintf(outfp, "\n-- %s\n\n" 644 "Total%% Count Time/ms Lock Caller\n" 645 "------ ------- --------- ------------------ ----------------------------------\n", 646 name); 647 648 /* 649 * Sum up all events for this type of lock + event. 650 */ 651 pcscale = 0; 652 TAILQ_FOREACH(l, &locklist[type], chain) { 653 if (cflag) 654 pcscale += l->counts[event]; 655 else 656 pcscale += l->times[event]; 657 displayed++; 658 } 659 if (pcscale == 0) 660 pcscale = 100; 661 else 662 pcscale = (100.0 / pcscale); 663 664 /* 665 * For each lock, print a summary total, followed by a breakdown by 666 * caller. 667 */ 668 TAILQ_FOREACH(l, &locklist[type], chain) { 669 if (cflag) 670 metric = l->counts[event]; 671 else 672 metric = l->times[event]; 673 metric *= pcscale; 674 675 findsym(LOCK_BYADDR, lname, &l->lock, NULL); 676 677 fprintf(outfp, "%6.2f %7d %9.2f %-18s <all>\n", metric, 678 (int)(l->counts[event] * cscale), 679 l->times[event] * tscale, lname); 680 681 if (lflag) 682 continue; 683 684 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) { 685 if (cflag) 686 metric = lb->lb_counts[event]; 687 else 688 metric = lb->lb_times[event]; 689 metric *= pcscale; 690 691 findsym(LOCK_BYADDR, lname, &lb->lb_lock, NULL); 692 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL); 693 fprintf(outfp, "%6.2f %7d %9.2f %-18s %s\n", metric, 694 (int)(lb->lb_counts[event] * cscale), 695 lb->lb_times[event] * tscale, 696 lname, fname); 697 } 698 } 699 } 700