1 /* $NetBSD: main.c,v 1.6 2006/11/13 19:08:52 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * TODO: 41 * 42 * - Need better analysis and tracking of events. 43 * - Should be binary format agnostic, but given that we're likely to be using 44 * ELF for quite a while that's not a big problem. 45 * - Shouldn't have to parse the namelist here. We should use something like 46 * FreeBSD's libelf. 47 * - The way the namelist is searched sucks, is it worth doing something 48 * better? 49 * - Might be nice to record events and replay later, like ktrace/kdump. 50 */ 51 52 #include <sys/cdefs.h> 53 #ifndef lint 54 __RCSID("$NetBSD: main.c,v 1.6 2006/11/13 19:08:52 ad Exp $"); 55 #endif /* not lint */ 56 57 #include <sys/types.h> 58 #include <sys/param.h> 59 #include <sys/time.h> 60 #include <sys/fcntl.h> 61 #include <sys/ioctl.h> 62 #include <sys/wait.h> 63 #include <sys/signal.h> 64 #include <sys/sysctl.h> 65 66 #include <dev/lockstat.h> 67 68 #include <stdio.h> 69 #include <stdlib.h> 70 #include <string.h> 71 #include <limits.h> 72 #include <unistd.h> 73 #include <err.h> 74 #include <paths.h> 75 #include <util.h> 76 #include <ctype.h> 77 #include <errno.h> 78 79 #include "extern.h" 80 81 #define _PATH_DEV_LOCKSTAT "/dev/lockstat" 82 83 #define MILLI 1000.0 84 #define MICRO 1000000.0 85 #define NANO 1000000000.0 86 #define PICO 1000000000000.0 87 88 TAILQ_HEAD(lock_head, lockstruct); 89 typedef struct lock_head locklist_t; 90 TAILQ_HEAD(buf_head, lsbuf); 91 typedef struct buf_head buflist_t; 92 93 typedef struct lockstruct { 94 TAILQ_ENTRY(lockstruct) chain; 95 buflist_t bufs; 96 uintptr_t lock; 97 double times[LB_NEVENT]; 98 uint32_t counts[LB_NEVENT]; 99 u_int flags; 100 u_int nbufs; 101 } lock_t; 102 103 typedef struct name { 104 const char *name; 105 int mask; 106 } name_t; 107 108 const name_t locknames[] = { 109 { "adaptive_mutex", LB_ADAPTIVE_MUTEX }, 110 { "spin_mutex", LB_SPIN_MUTEX }, 111 { "rwlock", LB_ADAPTIVE_RWLOCK }, 112 { "lockmgr", LB_LOCKMGR }, 113 #ifdef LB_KERNEL_LOCK 114 /* XXX newlock2 */ 115 { "kernel_lock", LB_KERNEL_LOCK }, 116 #endif 117 { NULL, 0 } 118 }; 119 120 const name_t eventnames[] = { 121 { "spin", LB_SPIN }, 122 { "sleep", LB_SLEEP }, 123 { NULL, 0 }, 124 }; 125 126 const name_t alltypes[] = { 127 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN }, 128 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP }, 129 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN }, 130 { "RW lock sleep", LB_ADAPTIVE_RWLOCK | LB_SLEEP }, 131 { "lockmgr sleep", LB_LOCKMGR | LB_SLEEP }, 132 #ifdef LB_KERNEL_LOCK 133 /* XXX newlock2 */ 134 { "Kernel lock spin", LB_KERNEL_LOCK | LB_SPIN }, 135 #endif 136 { NULL, 0 } 137 }; 138 139 locklist_t locklist[LB_NLOCK >> LB_LOCK_SHIFT]; 140 141 lsbuf_t *bufs; 142 lsdisable_t ld; 143 int lflag; 144 int nbufs; 145 int cflag; 146 int lsfd; 147 int displayed; 148 int bin64; 149 double tscale; 150 double cscale; 151 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])]; 152 FILE *outfp; 153 154 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *); 155 void spawn(int, char **); 156 void display(int, const char *name); 157 void listnames(const name_t *); 158 int matchname(const name_t *, const char *); 159 void makelists(void); 160 void nullsig(int); 161 void usage(void); 162 void resort(int, int); 163 int ncpu(void); 164 165 int 166 main(int argc, char **argv) 167 { 168 int eventtype, locktype, ch, nlfd, sflag, fd, i, pflag; 169 const char *nlistf, *outf; 170 char *lockname, *funcname; 171 const name_t *name; 172 lsenable_t le; 173 double ms; 174 char *p; 175 176 nlistf = NULL; 177 outf = NULL; 178 lockname = NULL; 179 funcname = NULL; 180 eventtype = -1; 181 locktype = -1; 182 nbufs = 0; 183 sflag = 0; 184 pflag = 0; 185 186 while ((ch = getopt(argc, argv, "E:F:L:M:N:T:b:ceflo:pst")) != -1) 187 switch (ch) { 188 case 'E': 189 eventtype = matchname(eventnames, optarg); 190 break; 191 case 'F': 192 funcname = optarg; 193 break; 194 case 'L': 195 lockname = optarg; 196 break; 197 case 'N': 198 nlistf = optarg; 199 break; 200 case 'T': 201 locktype = matchname(locknames, optarg); 202 break; 203 case 'b': 204 nbufs = (int)strtol(optarg, &p, 0); 205 if (!isdigit((u_int)*optarg) || *p != '\0') 206 usage(); 207 break; 208 case 'c': 209 cflag = 1; 210 break; 211 case 'e': 212 listnames(eventnames); 213 break; 214 case 'l': 215 lflag = 1; 216 break; 217 case 'o': 218 outf = optarg; 219 break; 220 case 'p': 221 pflag = 1; 222 break; 223 case 's': 224 sflag = 1; 225 break; 226 case 't': 227 listnames(locknames); 228 break; 229 default: 230 usage(); 231 } 232 argc -= optind; 233 argv += optind; 234 235 if (*argv == NULL) 236 usage(); 237 238 if (outf) { 239 fd = open(outf, O_WRONLY | O_CREAT | O_TRUNC, 0600); 240 if (fd == -1) 241 err(EXIT_FAILURE, "opening %s", outf); 242 outfp = fdopen(fd, "w"); 243 } else 244 outfp = stdout; 245 246 /* 247 * Find the name list for resolving symbol names, and load it into 248 * memory. 249 */ 250 if (nlistf == NULL) { 251 nlfd = open(_PATH_KSYMS, O_RDONLY); 252 nlistf = getbootfile(); 253 } else 254 nlfd = -1; 255 if (nlfd == -1) { 256 if ((nlfd = open(nlistf, O_RDONLY)) < 0) 257 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s", 258 nlistf); 259 } 260 if (loadsym32(nlfd) != 0) { 261 if (loadsym64(nlfd) != 0) 262 errx(EXIT_FAILURE, "unable to load symbol table"); 263 bin64 = 1; 264 } 265 close(nlfd); 266 267 memset(&le, 0, sizeof(le)); 268 le.le_nbufs = nbufs; 269 270 /* 271 * Set up initial filtering. 272 */ 273 if (lockname != NULL) { 274 findsym(LOCK_BYNAME, lockname, &le.le_lock, NULL); 275 le.le_flags |= LE_ONE_LOCK; 276 } 277 if (!lflag) 278 le.le_flags |= LE_CALLSITE; 279 if (funcname != NULL) { 280 if (lflag) 281 usage(); 282 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend); 283 le.le_flags |= LE_ONE_CALLSITE; 284 } 285 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK); 286 287 /* 288 * Start tracing. 289 */ 290 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0) 291 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT); 292 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0) 293 err(EXIT_FAILURE, "ioctl"); 294 if (ch != LS_VERSION) 295 errx(EXIT_FAILURE, "incompatible lockstat interface version"); 296 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le)) 297 err(EXIT_FAILURE, "cannot enable tracing"); 298 299 /* 300 * Execute the traced program. 301 */ 302 spawn(argc, argv); 303 304 /* 305 * Stop tracing, and read the trace buffers from the kernel. 306 */ 307 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) { 308 if (errno == EOVERFLOW) { 309 warnx("overflowed available kernel trace buffers"); 310 exit(EXIT_FAILURE); 311 } 312 err(EXIT_FAILURE, "cannot disable tracing"); 313 } 314 if ((bufs = malloc(ld.ld_size)) == NULL) 315 err(EXIT_FAILURE, "cannot allocate memory for user buffers"); 316 if (read(lsfd, bufs, ld.ld_size) != ld.ld_size) 317 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT); 318 if (close(lsfd)) 319 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")"); 320 321 /* 322 * Figure out how to scale the results, and build the lists. For 323 * internal use we convert all times from CPU frequency based to 324 * picoseconds, and values are eventually displayed in ms. 325 */ 326 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++) 327 if (ld.ld_freq[i] != 0) 328 cpuscale[i] = PICO / ld.ld_freq[i]; 329 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO; 330 if (pflag) 331 cscale = 1.0 / ncpu(); 332 else 333 cscale = 1.0; 334 cscale *= (sflag ? MILLI / ms : 1.0); 335 tscale = cscale / NANO; 336 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t)); 337 makelists(); 338 339 /* 340 * Display the results. 341 */ 342 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI); 343 if (sflag || pflag) { 344 fprintf(outfp, " Displaying "); 345 if (pflag) 346 fprintf(outfp, "per-CPU "); 347 if (sflag) 348 fprintf(outfp, "per-second "); 349 fprintf(outfp, "averages."); 350 } 351 putc('\n', outfp); 352 353 for (name = alltypes; name->name != NULL; name++) { 354 if (eventtype != -1 && 355 (name->mask & LB_EVENT_MASK) != eventtype) 356 continue; 357 if (locktype != -1 && 358 (name->mask & LB_LOCK_MASK) != locktype) 359 continue; 360 361 display(name->mask, name->name); 362 } 363 364 if (displayed == 0) 365 fprintf(outfp, "None of the selected events were recorded.\n"); 366 exit(EXIT_SUCCESS); 367 } 368 369 void 370 usage(void) 371 { 372 373 fprintf(stderr, 374 "%s: usage:\n" 375 "%s [options] <command>\n\n" 376 "-b nbuf\t\tset number of event buffers to allocate\n" 377 "-c\t\treport percentage of total events by count, not time\n" 378 "-E evt\t\tdisplay only one type of event\n" 379 "-e\t\tlist event types\n" 380 "-F func\t\tlimit trace to one function\n" 381 "-L lock\t\tlimit trace to one lock (name, or address)\n" 382 "-l\t\ttrace only by lock\n" 383 "-N nlist\tspecify name list file\n" 384 "-o file\t\tsend output to named file, not stdout\n" 385 "-p\t\tshow average count/time per CPU, not total\n" 386 "-s\t\tshow average count/time per second, not total\n" 387 "-T type\t\tdisplay only one type of lock\n" 388 "-t\t\tlist lock types\n", 389 getprogname(), getprogname()); 390 391 exit(EXIT_FAILURE); 392 } 393 394 void 395 nullsig(int junk) 396 { 397 398 (void)junk; 399 } 400 401 void 402 listnames(const name_t *name) 403 { 404 405 for (; name->name != NULL; name++) 406 printf("%s\n", name->name); 407 408 exit(EXIT_SUCCESS); 409 } 410 411 int 412 matchname(const name_t *name, const char *string) 413 { 414 415 for (; name->name != NULL; name++) 416 if (strcasecmp(name->name, string) == 0) 417 return name->mask; 418 419 warnx("unknown type `%s'", string); 420 usage(); 421 return 0; 422 } 423 424 /* 425 * Return the number of CPUs in the running system. 426 */ 427 int 428 ncpu(void) 429 { 430 int rv, mib[2]; 431 size_t varlen; 432 433 mib[0] = CTL_HW; 434 mib[1] = HW_NCPU; 435 varlen = sizeof(rv); 436 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0) 437 rv = 1; 438 439 return (rv); 440 } 441 442 /* 443 * Call into the ELF parser and look up a symbol by name or by address. 444 */ 445 void 446 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end) 447 { 448 uintptr_t tend; 449 char *p; 450 int rv; 451 452 if (end == NULL) 453 end = &tend; 454 455 if (find == LOCK_BYNAME) { 456 if (isdigit((u_int)name[0])) { 457 *start = (uintptr_t)strtoul(name, &p, 0); 458 if (*p == '\0') 459 return; 460 } 461 } 462 463 if (bin64) 464 rv = findsym64(find, name, start, end); 465 else 466 rv = findsym32(find, name, start, end); 467 468 if (find == FUNC_BYNAME || find == LOCK_BYNAME) { 469 if (rv == -1) 470 errx(EXIT_FAILURE, "unable to find symbol `%s'", name); 471 return; 472 } 473 474 if (rv == -1) 475 sprintf(name, "%016lx", (long)*start); 476 } 477 478 /* 479 * Fork off the child process and wait for it to complete. We trap SIGINT 480 * so that the caller can use Ctrl-C to stop tracing early and still get 481 * useful results. 482 */ 483 void 484 spawn(int argc, char **argv) 485 { 486 pid_t pid; 487 488 switch (pid = fork()) { 489 case 0: 490 close(lsfd); 491 if (execvp(argv[0], argv) == -1) 492 err(EXIT_FAILURE, "cannot exec"); 493 break; 494 case -1: 495 err(EXIT_FAILURE, "cannot fork to exec"); 496 break; 497 default: 498 signal(SIGINT, nullsig); 499 wait(NULL); 500 signal(SIGINT, SIG_DFL); 501 break; 502 } 503 } 504 505 /* 506 * From the kernel supplied data, construct two dimensional lists of locks 507 * and event buffers, indexed by lock type. 508 */ 509 void 510 makelists(void) 511 { 512 lsbuf_t *lb, *lb2, *max; 513 int i, type; 514 lock_t *l; 515 516 for (i = 0; i < LB_NLOCK >> LB_LOCK_SHIFT; i++) 517 TAILQ_INIT(&locklist[i]); 518 519 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) { 520 if (lb->lb_flags == 0) 521 continue; 522 523 /* 524 * Look for a record descibing this lock, and allocate a 525 * new one if needed. 526 */ 527 type = ((lb->lb_flags & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 528 TAILQ_FOREACH(l, &locklist[type], chain) { 529 if (l->lock == lb->lb_lock) 530 break; 531 } 532 if (l == NULL) { 533 l = (lock_t *)malloc(sizeof(*l)); 534 l->flags = lb->lb_flags; 535 l->lock = lb->lb_lock; 536 l->nbufs = 0; 537 memset(&l->counts, 0, sizeof(l->counts)); 538 memset(&l->times, 0, sizeof(l->times)); 539 TAILQ_INIT(&l->bufs); 540 TAILQ_INSERT_TAIL(&locklist[type], l, chain); 541 } 542 543 /* 544 * Scale the time values per buffer and summarise 545 * times+counts per lock. 546 */ 547 for (i = 0; i < LB_NEVENT; i++) { 548 lb->lb_times[i] *= cpuscale[lb->lb_cpu]; 549 l->counts[i] += lb->lb_counts[i]; 550 l->times[i] += lb->lb_times[i]; 551 } 552 553 /* 554 * Merge same lock+callsite pairs from multiple CPUs 555 * together. 556 */ 557 TAILQ_FOREACH(lb2, &l->bufs, lb_chain.tailq) { 558 if (lb->lb_callsite == lb2->lb_callsite) 559 break; 560 } 561 if (lb2 != NULL) { 562 for (i = 0; i < LB_NEVENT; i++) { 563 lb2->lb_counts[i] += lb->lb_counts[i]; 564 lb2->lb_times[i] += lb->lb_times[i]; 565 } 566 } else { 567 TAILQ_INSERT_HEAD(&l->bufs, lb, lb_chain.tailq); 568 l->nbufs++; 569 } 570 } 571 } 572 573 /* 574 * Re-sort one list of locks / lock buffers by event type. 575 */ 576 void 577 resort(int type, int event) 578 { 579 lsbuf_t *lb, *lb2; 580 locklist_t llist; 581 buflist_t blist; 582 lock_t *l, *l2; 583 584 TAILQ_INIT(&llist); 585 while ((l = TAILQ_FIRST(&locklist[type])) != NULL) { 586 TAILQ_REMOVE(&locklist[type], l, chain); 587 588 /* 589 * Sort the buffers into the per-lock list. 590 */ 591 TAILQ_INIT(&blist); 592 while ((lb = TAILQ_FIRST(&l->bufs)) != NULL) { 593 TAILQ_REMOVE(&l->bufs, lb, lb_chain.tailq); 594 595 lb2 = TAILQ_FIRST(&blist); 596 while (lb2 != NULL) { 597 if (cflag) { 598 if (lb->lb_counts[event] > 599 lb2->lb_counts[event]) 600 break; 601 } else if (lb->lb_times[event] > 602 lb2->lb_times[event]) 603 break; 604 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq); 605 } 606 if (lb2 == NULL) 607 TAILQ_INSERT_TAIL(&blist, lb, lb_chain.tailq); 608 else 609 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq); 610 } 611 l->bufs = blist; 612 613 /* 614 * Sort this lock into the per-type list, based on the 615 * totals per lock. 616 */ 617 l2 = TAILQ_FIRST(&llist); 618 while (l2 != NULL) { 619 if (cflag) { 620 if (l->counts[event] > l2->counts[event]) 621 break; 622 } else if (l->times[event] > l2->times[event]) 623 break; 624 l2 = TAILQ_NEXT(l2, chain); 625 } 626 if (l2 == NULL) 627 TAILQ_INSERT_TAIL(&llist, l, chain); 628 else 629 TAILQ_INSERT_BEFORE(l2, l, chain); 630 } 631 locklist[type] = llist; 632 } 633 634 /* 635 * Display a summary table for one lock type / event type pair. 636 */ 637 void 638 display(int mask, const char *name) 639 { 640 lock_t *l; 641 lsbuf_t *lb; 642 int event, type; 643 double pcscale, metric; 644 char lname[256], fname[256]; 645 646 type = ((mask & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 647 if (TAILQ_EMPTY(&locklist[type])) 648 return; 649 650 event = (mask & LB_EVENT_MASK) - 1; 651 resort(type, event); 652 653 fprintf(outfp, "\n-- %s\n\n" 654 "Total%% Count Time/ms Lock Caller\n" 655 "------ ------- --------- ---------------------- ------------------------------\n", 656 name); 657 658 /* 659 * Sum up all events for this type of lock + event. 660 */ 661 pcscale = 0; 662 TAILQ_FOREACH(l, &locklist[type], chain) { 663 if (cflag) 664 pcscale += l->counts[event]; 665 else 666 pcscale += l->times[event]; 667 displayed++; 668 } 669 if (pcscale == 0) 670 pcscale = 100; 671 else 672 pcscale = (100.0 / pcscale); 673 674 /* 675 * For each lock, print a summary total, followed by a breakdown by 676 * caller. 677 */ 678 TAILQ_FOREACH(l, &locklist[type], chain) { 679 if (cflag) 680 metric = l->counts[event]; 681 else 682 metric = l->times[event]; 683 metric *= pcscale; 684 685 findsym(LOCK_BYADDR, lname, &l->lock, NULL); 686 687 if (lflag || l->nbufs > 1) 688 fprintf(outfp, "%6.2f %7d %9.2f %-22s <all>\n", metric, 689 (int)(l->counts[event] * cscale), 690 l->times[event] * tscale, lname); 691 692 if (lflag) 693 continue; 694 695 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) { 696 if (cflag) 697 metric = lb->lb_counts[event]; 698 else 699 metric = lb->lb_times[event]; 700 metric *= pcscale; 701 702 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL); 703 fprintf(outfp, "%6.2f %7d %9.2f %-22s %s\n", metric, 704 (int)(lb->lb_counts[event] * cscale), 705 lb->lb_times[event] * tscale, 706 lname, fname); 707 } 708 } 709 } 710