1 /* $NetBSD: tprof.c,v 1.7 2010/08/11 11:36:02 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c)2008,2009 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.7 2010/08/11 11:36:02 pgoyette Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 36 #include <sys/cpu.h> 37 #include <sys/conf.h> 38 #include <sys/callout.h> 39 #include <sys/kmem.h> 40 #include <sys/module.h> 41 #include <sys/workqueue.h> 42 #include <sys/queue.h> 43 44 #include <dev/tprof/tprof.h> 45 #include <dev/tprof/tprof_ioctl.h> 46 47 /* 48 * locking order: 49 * tprof_reader_lock -> tprof_lock 50 * tprof_startstop_lock -> tprof_lock 51 */ 52 53 /* 54 * protected by: 55 * L: tprof_lock 56 * R: tprof_reader_lock 57 * S: tprof_startstop_lock 58 */ 59 60 typedef struct { 61 uintptr_t s_pc; /* program counter */ 62 } tprof_sample_t; 63 64 typedef struct tprof_buf { 65 u_int b_used; 66 u_int b_size; 67 u_int b_overflow; 68 u_int b_unused; 69 STAILQ_ENTRY(tprof_buf) b_list; 70 tprof_sample_t b_data[]; 71 } tprof_buf_t; 72 #define TPROF_BUF_BYTESIZE(sz) \ 73 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 74 #define TPROF_MAX_SAMPLES_PER_BUF 10000 75 76 #define TPROF_MAX_BUF 100 77 78 typedef struct { 79 tprof_buf_t *c_buf; 80 struct work c_work; 81 callout_t c_callout; 82 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 83 84 typedef struct tprof_backend { 85 const char *tb_name; 86 const tprof_backend_ops_t *tb_ops; 87 LIST_ENTRY(tprof_backend) tb_list; 88 int tb_usecount; /* S: */ 89 } tprof_backend_t; 90 91 static kmutex_t tprof_lock; 92 static bool tprof_running; 93 static u_int tprof_nworker; /* L: # of running worker LWPs */ 94 static lwp_t *tprof_owner; 95 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ 96 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */ 97 static struct workqueue *tprof_wq; 98 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); 99 static u_int tprof_samples_per_buf; 100 101 static tprof_backend_t *tprof_backend; /* S: */ 102 static LIST_HEAD(, tprof_backend) tprof_backends = 103 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ 104 105 static kmutex_t tprof_reader_lock; 106 static kcondvar_t tprof_reader_cv; /* L: */ 107 static off_t tprof_reader_offset; /* R: */ 108 109 static kmutex_t tprof_startstop_lock; 110 static kcondvar_t tprof_cv; /* L: */ 111 112 static struct tprof_stat tprof_stat; /* L: */ 113 114 static tprof_cpu_t * 115 tprof_cpu(struct cpu_info *ci) 116 { 117 118 return &tprof_cpus[cpu_index(ci)]; 119 } 120 121 static tprof_cpu_t * 122 tprof_curcpu(void) 123 { 124 125 return tprof_cpu(curcpu()); 126 } 127 128 static tprof_buf_t * 129 tprof_buf_alloc(void) 130 { 131 tprof_buf_t *new; 132 u_int size = tprof_samples_per_buf; 133 134 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 135 new->b_used = 0; 136 new->b_size = size; 137 new->b_overflow = 0; 138 return new; 139 } 140 141 static void 142 tprof_buf_free(tprof_buf_t *buf) 143 { 144 145 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 146 } 147 148 static tprof_buf_t * 149 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 150 { 151 tprof_buf_t *old; 152 153 old = c->c_buf; 154 c->c_buf = new; 155 return old; 156 } 157 158 static tprof_buf_t * 159 tprof_buf_refresh(void) 160 { 161 tprof_cpu_t * const c = tprof_curcpu(); 162 tprof_buf_t *new; 163 164 new = tprof_buf_alloc(); 165 return tprof_buf_switch(c, new); 166 } 167 168 static void 169 tprof_worker(struct work *wk, void *dummy) 170 { 171 tprof_cpu_t * const c = tprof_curcpu(); 172 tprof_buf_t *buf; 173 bool shouldstop; 174 175 KASSERT(wk == &c->c_work); 176 KASSERT(dummy == NULL); 177 178 /* 179 * get a per cpu buffer. 180 */ 181 buf = tprof_buf_refresh(); 182 183 /* 184 * and put it on the global list for read(2). 185 */ 186 mutex_enter(&tprof_lock); 187 shouldstop = !tprof_running; 188 if (shouldstop) { 189 KASSERT(tprof_nworker > 0); 190 tprof_nworker--; 191 cv_broadcast(&tprof_cv); 192 cv_broadcast(&tprof_reader_cv); 193 } 194 if (buf->b_used == 0) { 195 tprof_stat.ts_emptybuf++; 196 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) { 197 tprof_stat.ts_sample += buf->b_used; 198 tprof_stat.ts_overflow += buf->b_overflow; 199 tprof_stat.ts_buf++; 200 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 201 tprof_nbuf_on_list++; 202 buf = NULL; 203 cv_broadcast(&tprof_reader_cv); 204 } else { 205 tprof_stat.ts_dropbuf_sample += buf->b_used; 206 tprof_stat.ts_dropbuf++; 207 } 208 mutex_exit(&tprof_lock); 209 if (buf) { 210 tprof_buf_free(buf); 211 } 212 if (!shouldstop) { 213 callout_schedule(&c->c_callout, hz); 214 } 215 } 216 217 static void 218 tprof_kick(void *vp) 219 { 220 struct cpu_info * const ci = vp; 221 tprof_cpu_t * const c = tprof_cpu(ci); 222 223 workqueue_enqueue(tprof_wq, &c->c_work, ci); 224 } 225 226 static void 227 tprof_stop1(void) 228 { 229 CPU_INFO_ITERATOR cii; 230 struct cpu_info *ci; 231 232 KASSERT(mutex_owned(&tprof_startstop_lock)); 233 KASSERT(tprof_nworker == 0); 234 235 for (CPU_INFO_FOREACH(cii, ci)) { 236 tprof_cpu_t * const c = tprof_cpu(ci); 237 tprof_buf_t *old; 238 239 old = tprof_buf_switch(c, NULL); 240 if (old != NULL) { 241 tprof_buf_free(old); 242 } 243 callout_destroy(&c->c_callout); 244 } 245 workqueue_destroy(tprof_wq); 246 } 247 248 static int 249 tprof_start(const struct tprof_param *param) 250 { 251 CPU_INFO_ITERATOR cii; 252 struct cpu_info *ci; 253 int error; 254 uint64_t freq; 255 tprof_backend_t *tb; 256 257 KASSERT(mutex_owned(&tprof_startstop_lock)); 258 if (tprof_running) { 259 error = EBUSY; 260 goto done; 261 } 262 263 tb = tprof_backend; 264 if (tb == NULL) { 265 error = ENOENT; 266 goto done; 267 } 268 if (tb->tb_usecount > 0) { 269 error = EBUSY; 270 goto done; 271 } 272 273 tb->tb_usecount++; 274 freq = tb->tb_ops->tbo_estimate_freq(); 275 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); 276 277 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, 278 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 279 if (error != 0) { 280 goto done; 281 } 282 283 for (CPU_INFO_FOREACH(cii, ci)) { 284 tprof_cpu_t * const c = tprof_cpu(ci); 285 tprof_buf_t *new; 286 tprof_buf_t *old; 287 288 new = tprof_buf_alloc(); 289 old = tprof_buf_switch(c, new); 290 if (old != NULL) { 291 tprof_buf_free(old); 292 } 293 callout_init(&c->c_callout, CALLOUT_MPSAFE); 294 callout_setfunc(&c->c_callout, tprof_kick, ci); 295 } 296 297 error = tb->tb_ops->tbo_start(NULL); 298 if (error != 0) { 299 tprof_stop1(); 300 goto done; 301 } 302 303 mutex_enter(&tprof_lock); 304 tprof_running = true; 305 mutex_exit(&tprof_lock); 306 for (CPU_INFO_FOREACH(cii, ci)) { 307 tprof_cpu_t * const c = tprof_cpu(ci); 308 309 mutex_enter(&tprof_lock); 310 tprof_nworker++; 311 mutex_exit(&tprof_lock); 312 workqueue_enqueue(tprof_wq, &c->c_work, ci); 313 } 314 done: 315 return error; 316 } 317 318 static void 319 tprof_stop(void) 320 { 321 CPU_INFO_ITERATOR cii; 322 struct cpu_info *ci; 323 tprof_backend_t *tb; 324 325 KASSERT(mutex_owned(&tprof_startstop_lock)); 326 if (!tprof_running) { 327 goto done; 328 } 329 330 tb = tprof_backend; 331 KASSERT(tb->tb_usecount > 0); 332 tb->tb_ops->tbo_stop(NULL); 333 tb->tb_usecount--; 334 335 mutex_enter(&tprof_lock); 336 tprof_running = false; 337 cv_broadcast(&tprof_reader_cv); 338 mutex_exit(&tprof_lock); 339 340 for (CPU_INFO_FOREACH(cii, ci)) { 341 mutex_enter(&tprof_lock); 342 while (tprof_nworker > 0) { 343 cv_wait(&tprof_cv, &tprof_lock); 344 } 345 mutex_exit(&tprof_lock); 346 } 347 348 tprof_stop1(); 349 done: 350 ; 351 } 352 353 /* 354 * tprof_clear: drain unread samples. 355 */ 356 357 static void 358 tprof_clear(void) 359 { 360 tprof_buf_t *buf; 361 362 mutex_enter(&tprof_reader_lock); 363 mutex_enter(&tprof_lock); 364 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 365 if (buf != NULL) { 366 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 367 KASSERT(tprof_nbuf_on_list > 0); 368 tprof_nbuf_on_list--; 369 mutex_exit(&tprof_lock); 370 tprof_buf_free(buf); 371 mutex_enter(&tprof_lock); 372 } 373 } 374 KASSERT(tprof_nbuf_on_list == 0); 375 mutex_exit(&tprof_lock); 376 tprof_reader_offset = 0; 377 mutex_exit(&tprof_reader_lock); 378 379 memset(&tprof_stat, 0, sizeof(tprof_stat)); 380 } 381 382 static tprof_backend_t * 383 tprof_backend_lookup(const char *name) 384 { 385 tprof_backend_t *tb; 386 387 KASSERT(mutex_owned(&tprof_startstop_lock)); 388 389 LIST_FOREACH(tb, &tprof_backends, tb_list) { 390 if (!strcmp(tb->tb_name, name)) { 391 return tb; 392 } 393 } 394 return NULL; 395 } 396 397 /* -------------------- backend interfaces */ 398 399 /* 400 * tprof_sample: record a sample on the per-cpu buffer. 401 * 402 * be careful; can be called in NMI context. 403 * we are assuming that curcpu() is safe. 404 */ 405 406 void 407 tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi) 408 { 409 tprof_cpu_t * const c = tprof_curcpu(); 410 tprof_buf_t * const buf = c->c_buf; 411 const uintptr_t pc = tfi->tfi_pc; 412 u_int idx; 413 414 idx = buf->b_used; 415 if (__predict_false(idx >= buf->b_size)) { 416 buf->b_overflow++; 417 return; 418 } 419 buf->b_data[idx].s_pc = pc; 420 buf->b_used = idx + 1; 421 } 422 423 /* 424 * tprof_backend_register: 425 */ 426 427 int 428 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops, 429 int vers) 430 { 431 tprof_backend_t *tb; 432 433 if (vers != TPROF_BACKEND_VERSION) { 434 return EINVAL; 435 } 436 437 mutex_enter(&tprof_startstop_lock); 438 tb = tprof_backend_lookup(name); 439 if (tb != NULL) { 440 mutex_exit(&tprof_startstop_lock); 441 return EEXIST; 442 } 443 #if 1 /* XXX for now */ 444 if (!LIST_EMPTY(&tprof_backends)) { 445 mutex_exit(&tprof_startstop_lock); 446 return ENOTSUP; 447 } 448 #endif 449 tb = kmem_alloc(sizeof(*tb), KM_SLEEP); 450 tb->tb_name = name; 451 tb->tb_ops = ops; 452 tb->tb_usecount = 0; 453 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); 454 #if 1 /* XXX for now */ 455 if (tprof_backend == NULL) { 456 tprof_backend = tb; 457 } 458 #endif 459 mutex_exit(&tprof_startstop_lock); 460 461 return 0; 462 } 463 464 /* 465 * tprof_backend_unregister: 466 */ 467 468 int 469 tprof_backend_unregister(const char *name) 470 { 471 tprof_backend_t *tb; 472 473 mutex_enter(&tprof_startstop_lock); 474 tb = tprof_backend_lookup(name); 475 #if defined(DIAGNOSTIC) 476 if (tb == NULL) { 477 mutex_exit(&tprof_startstop_lock); 478 panic("%s: not found '%s'", __func__, name); 479 } 480 #endif /* defined(DIAGNOSTIC) */ 481 if (tb->tb_usecount > 0) { 482 mutex_exit(&tprof_startstop_lock); 483 return EBUSY; 484 } 485 #if 1 /* XXX for now */ 486 if (tprof_backend == tb) { 487 tprof_backend = NULL; 488 } 489 #endif 490 LIST_REMOVE(tb, tb_list); 491 mutex_exit(&tprof_startstop_lock); 492 493 kmem_free(tb, sizeof(*tb)); 494 495 return 0; 496 } 497 498 /* -------------------- cdevsw interfaces */ 499 500 void tprofattach(int); 501 502 static int 503 tprof_open(dev_t dev, int flags, int type, struct lwp *l) 504 { 505 506 if (minor(dev) != 0) { 507 return EXDEV; 508 } 509 mutex_enter(&tprof_lock); 510 if (tprof_owner != NULL) { 511 mutex_exit(&tprof_lock); 512 return EBUSY; 513 } 514 tprof_owner = curlwp; 515 mutex_exit(&tprof_lock); 516 517 return 0; 518 } 519 520 static int 521 tprof_close(dev_t dev, int flags, int type, struct lwp *l) 522 { 523 524 KASSERT(minor(dev) == 0); 525 526 mutex_enter(&tprof_startstop_lock); 527 mutex_enter(&tprof_lock); 528 tprof_owner = NULL; 529 mutex_exit(&tprof_lock); 530 tprof_stop(); 531 tprof_clear(); 532 mutex_exit(&tprof_startstop_lock); 533 534 return 0; 535 } 536 537 static int 538 tprof_read(dev_t dev, struct uio *uio, int flags) 539 { 540 tprof_buf_t *buf; 541 size_t bytes; 542 size_t resid; 543 size_t done; 544 int error = 0; 545 546 KASSERT(minor(dev) == 0); 547 mutex_enter(&tprof_reader_lock); 548 while (uio->uio_resid > 0 && error == 0) { 549 /* 550 * take the first buffer from the list. 551 */ 552 mutex_enter(&tprof_lock); 553 buf = STAILQ_FIRST(&tprof_list); 554 if (buf == NULL) { 555 if (tprof_nworker == 0) { 556 mutex_exit(&tprof_lock); 557 error = 0; 558 break; 559 } 560 mutex_exit(&tprof_reader_lock); 561 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 562 mutex_exit(&tprof_lock); 563 mutex_enter(&tprof_reader_lock); 564 continue; 565 } 566 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 567 KASSERT(tprof_nbuf_on_list > 0); 568 tprof_nbuf_on_list--; 569 mutex_exit(&tprof_lock); 570 571 /* 572 * copy it out. 573 */ 574 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 575 tprof_reader_offset, uio->uio_resid); 576 resid = uio->uio_resid; 577 error = uiomove((char *)buf->b_data + tprof_reader_offset, 578 bytes, uio); 579 done = resid - uio->uio_resid; 580 tprof_reader_offset += done; 581 582 /* 583 * if we didn't consume the whole buffer, 584 * put it back to the list. 585 */ 586 if (tprof_reader_offset < 587 buf->b_used * sizeof(tprof_sample_t)) { 588 mutex_enter(&tprof_lock); 589 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 590 tprof_nbuf_on_list++; 591 cv_broadcast(&tprof_reader_cv); 592 mutex_exit(&tprof_lock); 593 } else { 594 tprof_buf_free(buf); 595 tprof_reader_offset = 0; 596 } 597 } 598 mutex_exit(&tprof_reader_lock); 599 600 return error; 601 } 602 603 static int 604 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 605 { 606 const struct tprof_param *param; 607 int error = 0; 608 609 KASSERT(minor(dev) == 0); 610 611 switch (cmd) { 612 case TPROF_IOC_GETVERSION: 613 *(int *)data = TPROF_VERSION; 614 break; 615 case TPROF_IOC_START: 616 param = data; 617 mutex_enter(&tprof_startstop_lock); 618 error = tprof_start(param); 619 mutex_exit(&tprof_startstop_lock); 620 break; 621 case TPROF_IOC_STOP: 622 mutex_enter(&tprof_startstop_lock); 623 tprof_stop(); 624 mutex_exit(&tprof_startstop_lock); 625 break; 626 case TPROF_IOC_GETSTAT: 627 mutex_enter(&tprof_lock); 628 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 629 mutex_exit(&tprof_lock); 630 break; 631 default: 632 error = EINVAL; 633 break; 634 } 635 636 return error; 637 } 638 639 const struct cdevsw tprof_cdevsw = { 640 .d_open = tprof_open, 641 .d_close = tprof_close, 642 .d_read = tprof_read, 643 .d_write = nowrite, 644 .d_ioctl = tprof_ioctl, 645 .d_stop = nostop, 646 .d_tty = notty, 647 .d_poll = nopoll, 648 .d_mmap = nommap, 649 .d_kqfilter = nokqfilter, 650 .d_flag = D_OTHER | D_MPSAFE, 651 }; 652 653 void 654 tprofattach(int nunits) 655 { 656 657 /* nothing */ 658 } 659 660 MODULE(MODULE_CLASS_DRIVER, tprof, NULL); 661 662 static void 663 tprof_driver_init(void) 664 { 665 666 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 667 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 668 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 669 cv_init(&tprof_cv, "tprof"); 670 cv_init(&tprof_reader_cv, "tprof_rd"); 671 STAILQ_INIT(&tprof_list); 672 } 673 674 static void 675 tprof_driver_fini(void) 676 { 677 678 mutex_destroy(&tprof_lock); 679 mutex_destroy(&tprof_reader_lock); 680 mutex_destroy(&tprof_startstop_lock); 681 cv_destroy(&tprof_cv); 682 cv_destroy(&tprof_reader_cv); 683 } 684 685 static int 686 tprof_modcmd(modcmd_t cmd, void *arg) 687 { 688 689 switch (cmd) { 690 case MODULE_CMD_INIT: 691 tprof_driver_init(); 692 #if defined(_MODULE) 693 { 694 devmajor_t bmajor = NODEVMAJOR; 695 devmajor_t cmajor = NODEVMAJOR; 696 int error; 697 698 error = devsw_attach("tprof", NULL, &bmajor, 699 &tprof_cdevsw, &cmajor); 700 if (error) { 701 tprof_driver_fini(); 702 return error; 703 } 704 } 705 #endif /* defined(_MODULE) */ 706 return 0; 707 708 case MODULE_CMD_FINI: 709 #if defined(_MODULE) 710 { 711 int error; 712 error = devsw_detach(NULL, &tprof_cdevsw); 713 if (error) { 714 return error; 715 } 716 } 717 #endif /* defined(_MODULE) */ 718 tprof_driver_fini(); 719 return 0; 720 721 default: 722 return ENOTTY; 723 } 724 } 725