1 /* $NetBSD: main.c,v 1.5 2006/11/08 23:12:57 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2006 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39 /* 40 * TODO: 41 * 42 * - Need better analysis and tracking of events. 43 * - Should be binary format agnostic, but given that we're likely to be using 44 * ELF for quite a while that's not a big problem. 45 * - Shouldn't have to parse the namelist here. We should use something like 46 * FreeBSD's libelf. 47 * - The way the namelist is searched sucks, is it worth doing something 48 * better? 49 * - Might be nice to record events and replay later, like ktrace/kdump. 50 */ 51 52 #include <sys/cdefs.h> 53 #ifndef lint 54 __RCSID("$NetBSD: main.c,v 1.5 2006/11/08 23:12:57 ad Exp $"); 55 #endif /* not lint */ 56 57 #include <sys/types.h> 58 #include <sys/param.h> 59 #include <sys/time.h> 60 #include <sys/fcntl.h> 61 #include <sys/ioctl.h> 62 #include <sys/wait.h> 63 #include <sys/signal.h> 64 #include <sys/sysctl.h> 65 66 #include <dev/lockstat.h> 67 68 #include <stdio.h> 69 #include <stdlib.h> 70 #include <string.h> 71 #include <limits.h> 72 #include <unistd.h> 73 #include <err.h> 74 #include <paths.h> 75 #include <util.h> 76 #include <ctype.h> 77 #include <errno.h> 78 79 #include "extern.h" 80 81 #define _PATH_DEV_LOCKSTAT "/dev/lockstat" 82 83 #define MILLI 1000.0 84 #define MICRO 1000000.0 85 #define NANO 1000000000.0 86 #define PICO 1000000000000.0 87 88 TAILQ_HEAD(lock_head, lockstruct); 89 typedef struct lock_head locklist_t; 90 TAILQ_HEAD(buf_head, lsbuf); 91 typedef struct buf_head buflist_t; 92 93 typedef struct lockstruct { 94 TAILQ_ENTRY(lockstruct) chain; 95 buflist_t bufs; 96 uintptr_t lock; 97 double times[LB_NEVENT]; 98 uint32_t counts[LB_NEVENT]; 99 u_int flags; 100 u_int nbufs; 101 } lock_t; 102 103 typedef struct name { 104 const char *name; 105 int mask; 106 } name_t; 107 108 const name_t locknames[] = { 109 { "adaptive_mutex", LB_ADAPTIVE_MUTEX }, 110 { "adaptive_rwlock", LB_ADAPTIVE_RWLOCK }, 111 { "spin_mutex", LB_SPIN_MUTEX }, 112 { "spin_rwlock", LB_SPIN_RWLOCK }, 113 { "lockmgr", LB_LOCKMGR }, 114 { NULL, 0 } 115 }; 116 117 const name_t eventnames[] = { 118 { "spin", LB_SPIN }, 119 { "sleep", LB_SLEEP }, 120 { NULL, 0 }, 121 }; 122 123 const name_t alltypes[] = { 124 { "Adaptive mutex spin", LB_ADAPTIVE_MUTEX | LB_SPIN }, 125 { "Adaptive mutex sleep", LB_ADAPTIVE_MUTEX | LB_SLEEP }, 126 { "Adaptive RW lock spin", LB_ADAPTIVE_RWLOCK | LB_SPIN }, 127 { "Adaptive RW lock sleep", LB_ADAPTIVE_RWLOCK | LB_SLEEP }, 128 { "Spin mutex spin", LB_SPIN_MUTEX | LB_SPIN }, 129 { "lockmgr sleep", LB_LOCKMGR | LB_SLEEP }, 130 #ifdef LB_KERNEL_LOCK 131 /* XXX newlock2 */ 132 { "Kernel lock spin", LB_KERNEL_LOCK | LB_SPIN }, 133 #endif 134 { NULL, 0 } 135 }; 136 137 locklist_t locklist[LB_NLOCK >> LB_LOCK_SHIFT]; 138 139 lsbuf_t *bufs; 140 lsdisable_t ld; 141 int lflag; 142 int nbufs; 143 int cflag; 144 int lsfd; 145 int displayed; 146 int bin64; 147 double tscale; 148 double cscale; 149 double cpuscale[sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0])]; 150 FILE *outfp; 151 152 void findsym(findsym_t, char *, uintptr_t *, uintptr_t *); 153 void spawn(int, char **); 154 void display(int, const char *name); 155 void listnames(const name_t *); 156 int matchname(const name_t *, const char *); 157 void makelists(void); 158 void nullsig(int); 159 void usage(void); 160 void resort(int, int); 161 int ncpu(void); 162 163 int 164 main(int argc, char **argv) 165 { 166 int eventtype, locktype, ch, nlfd, sflag, fd, i, pflag; 167 const char *nlistf, *outf; 168 char *lockname, *funcname; 169 const name_t *name; 170 lsenable_t le; 171 double ms; 172 char *p; 173 174 nlistf = NULL; 175 outf = NULL; 176 lockname = NULL; 177 funcname = NULL; 178 eventtype = -1; 179 locktype = -1; 180 nbufs = 0; 181 sflag = 0; 182 pflag = 0; 183 184 while ((ch = getopt(argc, argv, "E:F:L:M:N:T:b:ceflo:pst")) != -1) 185 switch (ch) { 186 case 'E': 187 eventtype = matchname(eventnames, optarg); 188 break; 189 case 'F': 190 funcname = optarg; 191 break; 192 case 'L': 193 lockname = optarg; 194 break; 195 case 'N': 196 nlistf = optarg; 197 break; 198 case 'T': 199 locktype = matchname(locknames, optarg); 200 break; 201 case 'b': 202 nbufs = (int)strtol(optarg, &p, 0); 203 if (!isdigit((u_int)*optarg) || *p != '\0') 204 usage(); 205 break; 206 case 'c': 207 cflag = 1; 208 break; 209 case 'e': 210 listnames(eventnames); 211 break; 212 case 'l': 213 lflag = 1; 214 break; 215 case 'o': 216 outf = optarg; 217 break; 218 case 'p': 219 pflag = 1; 220 break; 221 case 's': 222 sflag = 1; 223 break; 224 case 't': 225 listnames(locknames); 226 break; 227 default: 228 usage(); 229 } 230 argc -= optind; 231 argv += optind; 232 233 if (*argv == NULL) 234 usage(); 235 236 if (outf) { 237 fd = open(outf, O_WRONLY | O_CREAT | O_TRUNC, 0600); 238 if (fd == -1) 239 err(EXIT_FAILURE, "opening %s", outf); 240 outfp = fdopen(fd, "w"); 241 } else 242 outfp = stdout; 243 244 /* 245 * Find the name list for resolving symbol names, and load it into 246 * memory. 247 */ 248 if (nlistf == NULL) { 249 nlfd = open(_PATH_KSYMS, O_RDONLY); 250 nlistf = getbootfile(); 251 } else 252 nlfd = -1; 253 if (nlfd == -1) { 254 if ((nlfd = open(nlistf, O_RDONLY)) < 0) 255 err(EXIT_FAILURE, "cannot open " _PATH_KSYMS " or %s", 256 nlistf); 257 } 258 if (loadsym32(nlfd) != 0) { 259 if (loadsym64(nlfd) != 0) 260 errx(EXIT_FAILURE, "unable to load symbol table"); 261 bin64 = 1; 262 } 263 close(nlfd); 264 265 memset(&le, 0, sizeof(le)); 266 le.le_nbufs = nbufs; 267 268 /* 269 * Set up initial filtering. 270 */ 271 if (lockname != NULL) { 272 findsym(LOCK_BYNAME, lockname, &le.le_lock, NULL); 273 le.le_flags |= LE_ONE_LOCK; 274 } 275 if (!lflag) 276 le.le_flags |= LE_CALLSITE; 277 if (funcname != NULL) { 278 if (lflag) 279 usage(); 280 findsym(FUNC_BYNAME, funcname, &le.le_csstart, &le.le_csend); 281 le.le_flags |= LE_ONE_CALLSITE; 282 } 283 le.le_mask = (eventtype & LB_EVENT_MASK) | (locktype & LB_LOCK_MASK); 284 285 /* 286 * Start tracing. 287 */ 288 if ((lsfd = open(_PATH_DEV_LOCKSTAT, O_RDONLY)) < 0) 289 err(EXIT_FAILURE, "cannot open " _PATH_DEV_LOCKSTAT); 290 if (ioctl(lsfd, IOC_LOCKSTAT_GVERSION, &ch) < 0) 291 err(EXIT_FAILURE, "ioctl"); 292 if (ch != LS_VERSION) 293 errx(EXIT_FAILURE, "incompatible lockstat interface version"); 294 if (ioctl(lsfd, IOC_LOCKSTAT_ENABLE, &le)) 295 err(EXIT_FAILURE, "cannot enable tracing"); 296 297 /* 298 * Execute the traced program. 299 */ 300 spawn(argc, argv); 301 302 /* 303 * Stop tracing, and read the trace buffers from the kernel. 304 */ 305 if (ioctl(lsfd, IOC_LOCKSTAT_DISABLE, &ld) == -1) { 306 if (errno == EOVERFLOW) { 307 warnx("overflowed available kernel trace buffers"); 308 exit(EXIT_FAILURE); 309 } 310 err(EXIT_FAILURE, "cannot disable tracing"); 311 } 312 if ((bufs = malloc(ld.ld_size)) == NULL) 313 err(EXIT_FAILURE, "cannot allocate memory for user buffers"); 314 if (read(lsfd, bufs, ld.ld_size) != ld.ld_size) 315 err(EXIT_FAILURE, "reading from " _PATH_DEV_LOCKSTAT); 316 if (close(lsfd)) 317 err(EXIT_FAILURE, "close(" _PATH_DEV_LOCKSTAT ")"); 318 319 /* 320 * Figure out how to scale the results, and build the lists. For 321 * internal use we convert all times from CPU frequency based to 322 * picoseconds, and values are eventually displayed in ms. 323 */ 324 for (i = 0; i < sizeof(ld.ld_freq) / sizeof(ld.ld_freq[0]); i++) 325 if (ld.ld_freq[i] != 0) 326 cpuscale[i] = PICO / ld.ld_freq[i]; 327 ms = ld.ld_time.tv_sec * MILLI + ld.ld_time.tv_nsec / MICRO; 328 if (pflag) 329 cscale = 1.0 / ncpu(); 330 else 331 cscale = 1.0; 332 cscale *= (sflag ? MILLI / ms : 1.0); 333 tscale = cscale / NANO; 334 nbufs = (int)(ld.ld_size / sizeof(lsbuf_t)); 335 makelists(); 336 337 /* 338 * Display the results. 339 */ 340 fprintf(outfp, "Elapsed time: %.2f seconds.", ms / MILLI); 341 if (sflag || pflag) { 342 fprintf(outfp, " Displaying "); 343 if (pflag) 344 fprintf(outfp, "per-CPU "); 345 if (sflag) 346 fprintf(outfp, "per-second "); 347 fprintf(outfp, "averages."); 348 } 349 putc('\n', outfp); 350 351 for (name = alltypes; name->name != NULL; name++) { 352 if (eventtype != -1 && 353 (name->mask & LB_EVENT_MASK) != eventtype) 354 continue; 355 if (locktype != -1 && 356 (name->mask & LB_LOCK_MASK) != locktype) 357 continue; 358 359 display(name->mask, name->name); 360 } 361 362 if (displayed == 0) 363 fprintf(outfp, "None of the selected events were recorded.\n"); 364 exit(EXIT_SUCCESS); 365 } 366 367 void 368 usage(void) 369 { 370 371 fprintf(stderr, 372 "%s: usage:\n" 373 "%s [options] <command>\n\n" 374 "-b nbuf\t\tset number of event buffers to allocate\n" 375 "-c\t\treport percentage of total events by count, not time\n" 376 "-E evt\t\tdisplay only one type of event\n" 377 "-e\t\tlist event types\n" 378 "-F func\t\tlimit trace to one function\n" 379 "-L lock\t\tlimit trace to one lock (name, or address)\n" 380 "-l\t\ttrace only by lock\n" 381 "-N nlist\tspecify name list file\n" 382 "-o file\t\tsend output to named file, not stdout\n" 383 "-p\t\tshow average count/time per CPU, not total\n" 384 "-s\t\tshow average count/time per second, not total\n" 385 "-T type\t\tdisplay only one type of lock\n" 386 "-t\t\tlist lock types\n", 387 getprogname(), getprogname()); 388 389 exit(EXIT_FAILURE); 390 } 391 392 void 393 nullsig(int junk) 394 { 395 396 (void)junk; 397 } 398 399 void 400 listnames(const name_t *name) 401 { 402 403 for (; name->name != NULL; name++) 404 printf("%s\n", name->name); 405 406 exit(EXIT_SUCCESS); 407 } 408 409 int 410 matchname(const name_t *name, const char *string) 411 { 412 413 for (; name->name != NULL; name++) 414 if (strcasecmp(name->name, string) == 0) 415 return name->mask; 416 417 warnx("unknown type `%s'", string); 418 usage(); 419 return 0; 420 } 421 422 /* 423 * Return the number of CPUs in the running system. 424 */ 425 int 426 ncpu(void) 427 { 428 int rv, mib[2]; 429 size_t varlen; 430 431 mib[0] = CTL_HW; 432 mib[1] = HW_NCPU; 433 varlen = sizeof(rv); 434 if (sysctl(mib, 2, &rv, &varlen, NULL, (size_t)0) < 0) 435 rv = 1; 436 437 return (rv); 438 } 439 440 /* 441 * Call into the ELF parser and look up a symbol by name or by address. 442 */ 443 void 444 findsym(findsym_t find, char *name, uintptr_t *start, uintptr_t *end) 445 { 446 uintptr_t tend; 447 char *p; 448 int rv; 449 450 if (end == NULL) 451 end = &tend; 452 453 if (find == LOCK_BYNAME) { 454 if (isdigit((u_int)name[0])) { 455 *start = (uintptr_t)strtoul(name, &p, 0); 456 if (*p == '\0') 457 return; 458 } 459 } 460 461 if (bin64) 462 rv = findsym64(find, name, start, end); 463 else 464 rv = findsym32(find, name, start, end); 465 466 if (find == FUNC_BYNAME || find == LOCK_BYNAME) { 467 if (rv == -1) 468 errx(EXIT_FAILURE, "unable to find symbol `%s'", name); 469 return; 470 } 471 472 if (rv == -1) 473 sprintf(name, "0x%016lx", (long)*start); 474 } 475 476 /* 477 * Fork off the child process and wait for it to complete. We trap SIGINT 478 * so that the caller can use Ctrl-C to stop tracing early and still get 479 * useful results. 480 */ 481 void 482 spawn(int argc, char **argv) 483 { 484 pid_t pid; 485 486 switch (pid = fork()) { 487 case 0: 488 close(lsfd); 489 if (execvp(argv[0], argv) == -1) 490 err(EXIT_FAILURE, "cannot exec"); 491 break; 492 case -1: 493 err(EXIT_FAILURE, "cannot fork to exec"); 494 break; 495 default: 496 signal(SIGINT, nullsig); 497 wait(NULL); 498 signal(SIGINT, SIG_DFL); 499 break; 500 } 501 } 502 503 /* 504 * From the kernel supplied data, construct two dimensional lists of locks 505 * and event buffers, indexed by lock type. 506 */ 507 void 508 makelists(void) 509 { 510 lsbuf_t *lb, *lb2, *max; 511 int i, type; 512 lock_t *l; 513 514 for (i = 0; i < LB_NLOCK >> LB_LOCK_SHIFT; i++) 515 TAILQ_INIT(&locklist[i]); 516 517 for (lb = bufs, max = bufs + nbufs; lb < max; lb++) { 518 if (lb->lb_flags == 0) 519 continue; 520 521 /* 522 * Look for a record descibing this lock, and allocate a 523 * new one if needed. 524 */ 525 type = ((lb->lb_flags & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 526 TAILQ_FOREACH(l, &locklist[type], chain) { 527 if (l->lock == lb->lb_lock) 528 break; 529 } 530 if (l == NULL) { 531 l = (lock_t *)malloc(sizeof(*l)); 532 l->flags = lb->lb_flags; 533 l->lock = lb->lb_lock; 534 l->nbufs = 0; 535 memset(&l->counts, 0, sizeof(l->counts)); 536 memset(&l->times, 0, sizeof(l->times)); 537 TAILQ_INIT(&l->bufs); 538 TAILQ_INSERT_TAIL(&locklist[type], l, chain); 539 } 540 541 /* 542 * Scale the time values per buffer and summarise 543 * times+counts per lock. 544 */ 545 for (i = 0; i < LB_NEVENT; i++) { 546 lb->lb_times[i] *= cpuscale[lb->lb_cpu]; 547 l->counts[i] += lb->lb_counts[i]; 548 l->times[i] += lb->lb_times[i]; 549 } 550 551 /* 552 * Merge same lock+callsite pairs from multiple CPUs 553 * together. 554 */ 555 TAILQ_FOREACH(lb2, &l->bufs, lb_chain.tailq) { 556 if (lb->lb_callsite == lb2->lb_callsite) 557 break; 558 } 559 if (lb2 != NULL) { 560 for (i = 0; i < LB_NEVENT; i++) { 561 lb2->lb_counts[i] += lb->lb_counts[i]; 562 lb2->lb_times[i] += lb->lb_times[i]; 563 } 564 } else { 565 TAILQ_INSERT_HEAD(&l->bufs, lb, lb_chain.tailq); 566 l->nbufs++; 567 } 568 } 569 } 570 571 /* 572 * Re-sort one list of locks / lock buffers by event type. 573 */ 574 void 575 resort(int type, int event) 576 { 577 lsbuf_t *lb, *lb2; 578 locklist_t llist; 579 buflist_t blist; 580 lock_t *l, *l2; 581 582 TAILQ_INIT(&llist); 583 while ((l = TAILQ_FIRST(&locklist[type])) != NULL) { 584 TAILQ_REMOVE(&locklist[type], l, chain); 585 586 /* 587 * Sort the buffers into the per-lock list. 588 */ 589 TAILQ_INIT(&blist); 590 while ((lb = TAILQ_FIRST(&l->bufs)) != NULL) { 591 TAILQ_REMOVE(&l->bufs, lb, lb_chain.tailq); 592 593 lb2 = TAILQ_FIRST(&blist); 594 while (lb2 != NULL) { 595 if (cflag) { 596 if (lb->lb_counts[event] > 597 lb2->lb_counts[event]) 598 break; 599 } else if (lb->lb_times[event] > 600 lb2->lb_times[event]) 601 break; 602 lb2 = TAILQ_NEXT(lb2, lb_chain.tailq); 603 } 604 if (lb2 == NULL) 605 TAILQ_INSERT_TAIL(&blist, lb, lb_chain.tailq); 606 else 607 TAILQ_INSERT_BEFORE(lb2, lb, lb_chain.tailq); 608 } 609 l->bufs = blist; 610 611 /* 612 * Sort this lock into the per-type list, based on the 613 * totals per lock. 614 */ 615 l2 = TAILQ_FIRST(&llist); 616 while (l2 != NULL) { 617 if (cflag) { 618 if (l->counts[event] > l2->counts[event]) 619 break; 620 } else if (l->times[event] > l2->times[event]) 621 break; 622 l2 = TAILQ_NEXT(l2, chain); 623 } 624 if (l2 == NULL) 625 TAILQ_INSERT_TAIL(&llist, l, chain); 626 else 627 TAILQ_INSERT_BEFORE(l2, l, chain); 628 } 629 locklist[type] = llist; 630 } 631 632 /* 633 * Display a summary table for one lock type / event type pair. 634 */ 635 void 636 display(int mask, const char *name) 637 { 638 lock_t *l; 639 lsbuf_t *lb; 640 int event, type; 641 double pcscale, metric; 642 char lname[256], fname[256]; 643 644 type = ((mask & LB_LOCK_MASK) >> LB_LOCK_SHIFT) - 1; 645 if (TAILQ_EMPTY(&locklist[type])) 646 return; 647 648 event = (mask & LB_EVENT_MASK) - 1; 649 resort(type, event); 650 651 fprintf(outfp, "\n-- %s\n\n" 652 "Total%% Count Time/ms Lock Caller\n" 653 "------ ------- --------- ---------------------- ------------------------------\n", 654 name); 655 656 /* 657 * Sum up all events for this type of lock + event. 658 */ 659 pcscale = 0; 660 TAILQ_FOREACH(l, &locklist[type], chain) { 661 if (cflag) 662 pcscale += l->counts[event]; 663 else 664 pcscale += l->times[event]; 665 displayed++; 666 } 667 if (pcscale == 0) 668 pcscale = 100; 669 else 670 pcscale = (100.0 / pcscale); 671 672 /* 673 * For each lock, print a summary total, followed by a breakdown by 674 * caller. 675 */ 676 TAILQ_FOREACH(l, &locklist[type], chain) { 677 if (cflag) 678 metric = l->counts[event]; 679 else 680 metric = l->times[event]; 681 metric *= pcscale; 682 683 findsym(LOCK_BYADDR, lname, &l->lock, NULL); 684 685 if (l->nbufs > 1) 686 fprintf(outfp, "%6.2f %7d %9.2f %-22s <all>\n", metric, 687 (int)(l->counts[event] * cscale), 688 l->times[event] * tscale, lname); 689 690 if (lflag) 691 continue; 692 693 TAILQ_FOREACH(lb, &l->bufs, lb_chain.tailq) { 694 if (cflag) 695 metric = lb->lb_counts[event]; 696 else 697 metric = lb->lb_times[event]; 698 metric *= pcscale; 699 700 findsym(FUNC_BYADDR, fname, &lb->lb_callsite, NULL); 701 fprintf(outfp, "%6.2f %7d %9.2f %-22s %s\n", metric, 702 (int)(lb->lb_counts[event] * cscale), 703 lb->lb_times[event] * tscale, 704 lname, fname); 705 } 706 } 707 } 708