1 /* $NetBSD: tprof.c,v 1.9 2011/02/25 22:35:38 yamt Exp $ */ 2 3 /*- 4 * Copyright (c)2008,2009,2010 YAMAMOTO Takashi, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(0, "$NetBSD: tprof.c,v 1.9 2011/02/25 22:35:38 yamt Exp $"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 36 #include <sys/cpu.h> 37 #include <sys/conf.h> 38 #include <sys/callout.h> 39 #include <sys/kmem.h> 40 #include <sys/module.h> 41 #include <sys/proc.h> 42 #include <sys/workqueue.h> 43 #include <sys/queue.h> 44 45 #include <dev/tprof/tprof.h> 46 #include <dev/tprof/tprof_ioctl.h> 47 48 /* 49 * locking order: 50 * tprof_reader_lock -> tprof_lock 51 * tprof_startstop_lock -> tprof_lock 52 */ 53 54 /* 55 * protected by: 56 * L: tprof_lock 57 * R: tprof_reader_lock 58 * S: tprof_startstop_lock 59 * s: writer should hold tprof_startstop_lock and tprof_lock 60 * reader should hold tprof_startstop_lock or tprof_lock 61 */ 62 63 typedef struct tprof_buf { 64 u_int b_used; 65 u_int b_size; 66 u_int b_overflow; 67 u_int b_unused; 68 STAILQ_ENTRY(tprof_buf) b_list; 69 tprof_sample_t b_data[]; 70 } tprof_buf_t; 71 #define TPROF_BUF_BYTESIZE(sz) \ 72 (sizeof(tprof_buf_t) + (sz) * sizeof(tprof_sample_t)) 73 #define TPROF_MAX_SAMPLES_PER_BUF 10000 74 75 #define TPROF_MAX_BUF 100 76 77 typedef struct { 78 tprof_buf_t *c_buf; 79 struct work c_work; 80 callout_t c_callout; 81 } __aligned(CACHE_LINE_SIZE) tprof_cpu_t; 82 83 typedef struct tprof_backend { 84 const char *tb_name; 85 const tprof_backend_ops_t *tb_ops; 86 LIST_ENTRY(tprof_backend) tb_list; 87 int tb_usecount; /* S: */ 88 } tprof_backend_t; 89 90 static kmutex_t tprof_lock; 91 static bool tprof_running; /* s: */ 92 static u_int tprof_nworker; /* L: # of running worker LWPs */ 93 static lwp_t *tprof_owner; 94 static STAILQ_HEAD(, tprof_buf) tprof_list; /* L: global buffer list */ 95 static u_int tprof_nbuf_on_list; /* L: # of buffers on tprof_list */ 96 static struct workqueue *tprof_wq; 97 static tprof_cpu_t tprof_cpus[MAXCPUS] __aligned(CACHE_LINE_SIZE); 98 static u_int tprof_samples_per_buf; 99 100 static tprof_backend_t *tprof_backend; /* S: */ 101 static LIST_HEAD(, tprof_backend) tprof_backends = 102 LIST_HEAD_INITIALIZER(tprof_backend); /* S: */ 103 104 static kmutex_t tprof_reader_lock; 105 static kcondvar_t tprof_reader_cv; /* L: */ 106 static off_t tprof_reader_offset; /* R: */ 107 108 static kmutex_t tprof_startstop_lock; 109 static kcondvar_t tprof_cv; /* L: */ 110 111 static struct tprof_stat tprof_stat; /* L: */ 112 113 static tprof_cpu_t * 114 tprof_cpu(struct cpu_info *ci) 115 { 116 117 return &tprof_cpus[cpu_index(ci)]; 118 } 119 120 static tprof_cpu_t * 121 tprof_curcpu(void) 122 { 123 124 return tprof_cpu(curcpu()); 125 } 126 127 static tprof_buf_t * 128 tprof_buf_alloc(void) 129 { 130 tprof_buf_t *new; 131 u_int size = tprof_samples_per_buf; 132 133 new = kmem_alloc(TPROF_BUF_BYTESIZE(size), KM_SLEEP); 134 new->b_used = 0; 135 new->b_size = size; 136 new->b_overflow = 0; 137 return new; 138 } 139 140 static void 141 tprof_buf_free(tprof_buf_t *buf) 142 { 143 144 kmem_free(buf, TPROF_BUF_BYTESIZE(buf->b_size)); 145 } 146 147 static tprof_buf_t * 148 tprof_buf_switch(tprof_cpu_t *c, tprof_buf_t *new) 149 { 150 tprof_buf_t *old; 151 152 old = c->c_buf; 153 c->c_buf = new; 154 return old; 155 } 156 157 static tprof_buf_t * 158 tprof_buf_refresh(void) 159 { 160 tprof_cpu_t * const c = tprof_curcpu(); 161 tprof_buf_t *new; 162 163 new = tprof_buf_alloc(); 164 return tprof_buf_switch(c, new); 165 } 166 167 static void 168 tprof_worker(struct work *wk, void *dummy) 169 { 170 tprof_cpu_t * const c = tprof_curcpu(); 171 tprof_buf_t *buf; 172 bool shouldstop; 173 174 KASSERT(wk == &c->c_work); 175 KASSERT(dummy == NULL); 176 177 /* 178 * get a per cpu buffer. 179 */ 180 buf = tprof_buf_refresh(); 181 182 /* 183 * and put it on the global list for read(2). 184 */ 185 mutex_enter(&tprof_lock); 186 shouldstop = !tprof_running; 187 if (shouldstop) { 188 KASSERT(tprof_nworker > 0); 189 tprof_nworker--; 190 cv_broadcast(&tprof_cv); 191 cv_broadcast(&tprof_reader_cv); 192 } 193 if (buf->b_used == 0) { 194 tprof_stat.ts_emptybuf++; 195 } else if (tprof_nbuf_on_list < TPROF_MAX_BUF) { 196 tprof_stat.ts_sample += buf->b_used; 197 tprof_stat.ts_overflow += buf->b_overflow; 198 tprof_stat.ts_buf++; 199 STAILQ_INSERT_TAIL(&tprof_list, buf, b_list); 200 tprof_nbuf_on_list++; 201 buf = NULL; 202 cv_broadcast(&tprof_reader_cv); 203 } else { 204 tprof_stat.ts_dropbuf_sample += buf->b_used; 205 tprof_stat.ts_dropbuf++; 206 } 207 mutex_exit(&tprof_lock); 208 if (buf) { 209 tprof_buf_free(buf); 210 } 211 if (!shouldstop) { 212 callout_schedule(&c->c_callout, hz); 213 } 214 } 215 216 static void 217 tprof_kick(void *vp) 218 { 219 struct cpu_info * const ci = vp; 220 tprof_cpu_t * const c = tprof_cpu(ci); 221 222 workqueue_enqueue(tprof_wq, &c->c_work, ci); 223 } 224 225 static void 226 tprof_stop1(void) 227 { 228 CPU_INFO_ITERATOR cii; 229 struct cpu_info *ci; 230 231 KASSERT(mutex_owned(&tprof_startstop_lock)); 232 KASSERT(tprof_nworker == 0); 233 234 for (CPU_INFO_FOREACH(cii, ci)) { 235 tprof_cpu_t * const c = tprof_cpu(ci); 236 tprof_buf_t *old; 237 238 old = tprof_buf_switch(c, NULL); 239 if (old != NULL) { 240 tprof_buf_free(old); 241 } 242 callout_destroy(&c->c_callout); 243 } 244 workqueue_destroy(tprof_wq); 245 } 246 247 static int 248 tprof_start(const struct tprof_param *param) 249 { 250 CPU_INFO_ITERATOR cii; 251 struct cpu_info *ci; 252 int error; 253 uint64_t freq; 254 tprof_backend_t *tb; 255 256 KASSERT(mutex_owned(&tprof_startstop_lock)); 257 if (tprof_running) { 258 error = EBUSY; 259 goto done; 260 } 261 262 tb = tprof_backend; 263 if (tb == NULL) { 264 error = ENOENT; 265 goto done; 266 } 267 if (tb->tb_usecount > 0) { 268 error = EBUSY; 269 goto done; 270 } 271 272 tb->tb_usecount++; 273 freq = tb->tb_ops->tbo_estimate_freq(); 274 tprof_samples_per_buf = MIN(freq * 2, TPROF_MAX_SAMPLES_PER_BUF); 275 276 error = workqueue_create(&tprof_wq, "tprofmv", tprof_worker, NULL, 277 PRI_NONE, IPL_SOFTCLOCK, WQ_MPSAFE | WQ_PERCPU); 278 if (error != 0) { 279 goto done; 280 } 281 282 for (CPU_INFO_FOREACH(cii, ci)) { 283 tprof_cpu_t * const c = tprof_cpu(ci); 284 tprof_buf_t *new; 285 tprof_buf_t *old; 286 287 new = tprof_buf_alloc(); 288 old = tprof_buf_switch(c, new); 289 if (old != NULL) { 290 tprof_buf_free(old); 291 } 292 callout_init(&c->c_callout, CALLOUT_MPSAFE); 293 callout_setfunc(&c->c_callout, tprof_kick, ci); 294 } 295 296 error = tb->tb_ops->tbo_start(NULL); 297 if (error != 0) { 298 KASSERT(tb->tb_usecount > 0); 299 tb->tb_usecount--; 300 tprof_stop1(); 301 goto done; 302 } 303 304 mutex_enter(&tprof_lock); 305 tprof_running = true; 306 mutex_exit(&tprof_lock); 307 for (CPU_INFO_FOREACH(cii, ci)) { 308 tprof_cpu_t * const c = tprof_cpu(ci); 309 310 mutex_enter(&tprof_lock); 311 tprof_nworker++; 312 mutex_exit(&tprof_lock); 313 workqueue_enqueue(tprof_wq, &c->c_work, ci); 314 } 315 done: 316 return error; 317 } 318 319 static void 320 tprof_stop(void) 321 { 322 tprof_backend_t *tb; 323 324 KASSERT(mutex_owned(&tprof_startstop_lock)); 325 if (!tprof_running) { 326 goto done; 327 } 328 329 tb = tprof_backend; 330 KASSERT(tb->tb_usecount > 0); 331 tb->tb_ops->tbo_stop(NULL); 332 tb->tb_usecount--; 333 334 mutex_enter(&tprof_lock); 335 tprof_running = false; 336 cv_broadcast(&tprof_reader_cv); 337 while (tprof_nworker > 0) { 338 cv_wait(&tprof_cv, &tprof_lock); 339 } 340 mutex_exit(&tprof_lock); 341 342 tprof_stop1(); 343 done: 344 ; 345 } 346 347 /* 348 * tprof_clear: drain unread samples. 349 */ 350 351 static void 352 tprof_clear(void) 353 { 354 tprof_buf_t *buf; 355 356 mutex_enter(&tprof_reader_lock); 357 mutex_enter(&tprof_lock); 358 while ((buf = STAILQ_FIRST(&tprof_list)) != NULL) { 359 if (buf != NULL) { 360 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 361 KASSERT(tprof_nbuf_on_list > 0); 362 tprof_nbuf_on_list--; 363 mutex_exit(&tprof_lock); 364 tprof_buf_free(buf); 365 mutex_enter(&tprof_lock); 366 } 367 } 368 KASSERT(tprof_nbuf_on_list == 0); 369 mutex_exit(&tprof_lock); 370 tprof_reader_offset = 0; 371 mutex_exit(&tprof_reader_lock); 372 373 memset(&tprof_stat, 0, sizeof(tprof_stat)); 374 } 375 376 static tprof_backend_t * 377 tprof_backend_lookup(const char *name) 378 { 379 tprof_backend_t *tb; 380 381 KASSERT(mutex_owned(&tprof_startstop_lock)); 382 383 LIST_FOREACH(tb, &tprof_backends, tb_list) { 384 if (!strcmp(tb->tb_name, name)) { 385 return tb; 386 } 387 } 388 return NULL; 389 } 390 391 /* -------------------- backend interfaces */ 392 393 /* 394 * tprof_sample: record a sample on the per-cpu buffer. 395 * 396 * be careful; can be called in NMI context. 397 * we are bluntly assuming that curcpu() and curlwp->l_proc->p_pid are safe. 398 */ 399 400 void 401 tprof_sample(tprof_backend_cookie_t *cookie, const tprof_frame_info_t *tfi) 402 { 403 tprof_cpu_t * const c = tprof_curcpu(); 404 tprof_buf_t * const buf = c->c_buf; 405 tprof_sample_t *sp; 406 const uintptr_t pc = tfi->tfi_pc; 407 u_int idx; 408 409 idx = buf->b_used; 410 if (__predict_false(idx >= buf->b_size)) { 411 buf->b_overflow++; 412 return; 413 } 414 sp = &buf->b_data[idx]; 415 sp->s_pid = curlwp->l_proc->p_pid; 416 sp->s_flags = (tfi->tfi_inkernel) ? TPROF_SAMPLE_INKERNEL : 0; 417 sp->s_pc = pc; 418 buf->b_used = idx + 1; 419 } 420 421 /* 422 * tprof_backend_register: 423 */ 424 425 int 426 tprof_backend_register(const char *name, const tprof_backend_ops_t *ops, 427 int vers) 428 { 429 tprof_backend_t *tb; 430 431 if (vers != TPROF_BACKEND_VERSION) { 432 return EINVAL; 433 } 434 435 mutex_enter(&tprof_startstop_lock); 436 tb = tprof_backend_lookup(name); 437 if (tb != NULL) { 438 mutex_exit(&tprof_startstop_lock); 439 return EEXIST; 440 } 441 #if 1 /* XXX for now */ 442 if (!LIST_EMPTY(&tprof_backends)) { 443 mutex_exit(&tprof_startstop_lock); 444 return ENOTSUP; 445 } 446 #endif 447 tb = kmem_alloc(sizeof(*tb), KM_SLEEP); 448 tb->tb_name = name; 449 tb->tb_ops = ops; 450 tb->tb_usecount = 0; 451 LIST_INSERT_HEAD(&tprof_backends, tb, tb_list); 452 #if 1 /* XXX for now */ 453 if (tprof_backend == NULL) { 454 tprof_backend = tb; 455 } 456 #endif 457 mutex_exit(&tprof_startstop_lock); 458 459 return 0; 460 } 461 462 /* 463 * tprof_backend_unregister: 464 */ 465 466 int 467 tprof_backend_unregister(const char *name) 468 { 469 tprof_backend_t *tb; 470 471 mutex_enter(&tprof_startstop_lock); 472 tb = tprof_backend_lookup(name); 473 #if defined(DIAGNOSTIC) 474 if (tb == NULL) { 475 mutex_exit(&tprof_startstop_lock); 476 panic("%s: not found '%s'", __func__, name); 477 } 478 #endif /* defined(DIAGNOSTIC) */ 479 if (tb->tb_usecount > 0) { 480 mutex_exit(&tprof_startstop_lock); 481 return EBUSY; 482 } 483 #if 1 /* XXX for now */ 484 if (tprof_backend == tb) { 485 tprof_backend = NULL; 486 } 487 #endif 488 LIST_REMOVE(tb, tb_list); 489 mutex_exit(&tprof_startstop_lock); 490 491 kmem_free(tb, sizeof(*tb)); 492 493 return 0; 494 } 495 496 /* -------------------- cdevsw interfaces */ 497 498 void tprofattach(int); 499 500 static int 501 tprof_open(dev_t dev, int flags, int type, struct lwp *l) 502 { 503 504 if (minor(dev) != 0) { 505 return EXDEV; 506 } 507 mutex_enter(&tprof_lock); 508 if (tprof_owner != NULL) { 509 mutex_exit(&tprof_lock); 510 return EBUSY; 511 } 512 tprof_owner = curlwp; 513 mutex_exit(&tprof_lock); 514 515 return 0; 516 } 517 518 static int 519 tprof_close(dev_t dev, int flags, int type, struct lwp *l) 520 { 521 522 KASSERT(minor(dev) == 0); 523 524 mutex_enter(&tprof_startstop_lock); 525 mutex_enter(&tprof_lock); 526 tprof_owner = NULL; 527 mutex_exit(&tprof_lock); 528 tprof_stop(); 529 tprof_clear(); 530 mutex_exit(&tprof_startstop_lock); 531 532 return 0; 533 } 534 535 static int 536 tprof_read(dev_t dev, struct uio *uio, int flags) 537 { 538 tprof_buf_t *buf; 539 size_t bytes; 540 size_t resid; 541 size_t done; 542 int error = 0; 543 544 KASSERT(minor(dev) == 0); 545 mutex_enter(&tprof_reader_lock); 546 while (uio->uio_resid > 0 && error == 0) { 547 /* 548 * take the first buffer from the list. 549 */ 550 mutex_enter(&tprof_lock); 551 buf = STAILQ_FIRST(&tprof_list); 552 if (buf == NULL) { 553 if (tprof_nworker == 0) { 554 mutex_exit(&tprof_lock); 555 error = 0; 556 break; 557 } 558 mutex_exit(&tprof_reader_lock); 559 error = cv_wait_sig(&tprof_reader_cv, &tprof_lock); 560 mutex_exit(&tprof_lock); 561 mutex_enter(&tprof_reader_lock); 562 continue; 563 } 564 STAILQ_REMOVE_HEAD(&tprof_list, b_list); 565 KASSERT(tprof_nbuf_on_list > 0); 566 tprof_nbuf_on_list--; 567 mutex_exit(&tprof_lock); 568 569 /* 570 * copy it out. 571 */ 572 bytes = MIN(buf->b_used * sizeof(tprof_sample_t) - 573 tprof_reader_offset, uio->uio_resid); 574 resid = uio->uio_resid; 575 error = uiomove((char *)buf->b_data + tprof_reader_offset, 576 bytes, uio); 577 done = resid - uio->uio_resid; 578 tprof_reader_offset += done; 579 580 /* 581 * if we didn't consume the whole buffer, 582 * put it back to the list. 583 */ 584 if (tprof_reader_offset < 585 buf->b_used * sizeof(tprof_sample_t)) { 586 mutex_enter(&tprof_lock); 587 STAILQ_INSERT_HEAD(&tprof_list, buf, b_list); 588 tprof_nbuf_on_list++; 589 cv_broadcast(&tprof_reader_cv); 590 mutex_exit(&tprof_lock); 591 } else { 592 tprof_buf_free(buf); 593 tprof_reader_offset = 0; 594 } 595 } 596 mutex_exit(&tprof_reader_lock); 597 598 return error; 599 } 600 601 static int 602 tprof_ioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l) 603 { 604 const struct tprof_param *param; 605 int error = 0; 606 607 KASSERT(minor(dev) == 0); 608 609 switch (cmd) { 610 case TPROF_IOC_GETVERSION: 611 *(int *)data = TPROF_VERSION; 612 break; 613 case TPROF_IOC_START: 614 param = data; 615 mutex_enter(&tprof_startstop_lock); 616 error = tprof_start(param); 617 mutex_exit(&tprof_startstop_lock); 618 break; 619 case TPROF_IOC_STOP: 620 mutex_enter(&tprof_startstop_lock); 621 tprof_stop(); 622 mutex_exit(&tprof_startstop_lock); 623 break; 624 case TPROF_IOC_GETSTAT: 625 mutex_enter(&tprof_lock); 626 memcpy(data, &tprof_stat, sizeof(tprof_stat)); 627 mutex_exit(&tprof_lock); 628 break; 629 default: 630 error = EINVAL; 631 break; 632 } 633 634 return error; 635 } 636 637 const struct cdevsw tprof_cdevsw = { 638 .d_open = tprof_open, 639 .d_close = tprof_close, 640 .d_read = tprof_read, 641 .d_write = nowrite, 642 .d_ioctl = tprof_ioctl, 643 .d_stop = nostop, 644 .d_tty = notty, 645 .d_poll = nopoll, 646 .d_mmap = nommap, 647 .d_kqfilter = nokqfilter, 648 .d_flag = D_OTHER | D_MPSAFE, 649 }; 650 651 void 652 tprofattach(int nunits) 653 { 654 655 /* nothing */ 656 } 657 658 MODULE(MODULE_CLASS_DRIVER, tprof, NULL); 659 660 static void 661 tprof_driver_init(void) 662 { 663 664 mutex_init(&tprof_lock, MUTEX_DEFAULT, IPL_NONE); 665 mutex_init(&tprof_reader_lock, MUTEX_DEFAULT, IPL_NONE); 666 mutex_init(&tprof_startstop_lock, MUTEX_DEFAULT, IPL_NONE); 667 cv_init(&tprof_cv, "tprof"); 668 cv_init(&tprof_reader_cv, "tprof_rd"); 669 STAILQ_INIT(&tprof_list); 670 } 671 672 static void 673 tprof_driver_fini(void) 674 { 675 676 mutex_destroy(&tprof_lock); 677 mutex_destroy(&tprof_reader_lock); 678 mutex_destroy(&tprof_startstop_lock); 679 cv_destroy(&tprof_cv); 680 cv_destroy(&tprof_reader_cv); 681 } 682 683 static int 684 tprof_modcmd(modcmd_t cmd, void *arg) 685 { 686 687 switch (cmd) { 688 case MODULE_CMD_INIT: 689 tprof_driver_init(); 690 #if defined(_MODULE) 691 { 692 devmajor_t bmajor = NODEVMAJOR; 693 devmajor_t cmajor = NODEVMAJOR; 694 int error; 695 696 error = devsw_attach("tprof", NULL, &bmajor, 697 &tprof_cdevsw, &cmajor); 698 if (error) { 699 tprof_driver_fini(); 700 return error; 701 } 702 } 703 #endif /* defined(_MODULE) */ 704 return 0; 705 706 case MODULE_CMD_FINI: 707 #if defined(_MODULE) 708 { 709 int error; 710 error = devsw_detach(NULL, &tprof_cdevsw); 711 if (error) { 712 return error; 713 } 714 } 715 #endif /* defined(_MODULE) */ 716 tprof_driver_fini(); 717 return 0; 718 719 default: 720 return ENOTTY; 721 } 722 } 723